PaddlePaddle
diff --git a/‎Paddle‎ b/‎Paddle‎
diff --git a/‎backends/custom_cpu/kernels/reshape_kernel.cc‎
Lines changed: 14 additions & 14 deletions b/‎backends/custom_cpu/kernels/reshape_kernel.cc‎
Lines changed: 14 additions & 14 deletions
diff --git a/‎backends/custom_cpu/tests/unittests/test_argsort_op.py‎
Lines changed: 1 addition & 1 deletion b/‎backends/custom_cpu/tests/unittests/test_argsort_op.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/custom_cpu/tests/unittests/test_slice_op.py‎
Lines changed: 4 additions & 4 deletions b/‎backends/custom_cpu/tests/unittests/test_slice_op.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎backends/gcu/CMakeLists.txt‎
Lines changed: 19 additions & 66 deletions b/‎backends/gcu/CMakeLists.txt‎
Lines changed: 19 additions & 66 deletions
diff --git a/‎backends/gcu/README.md‎
Lines changed: 33 additions & 6 deletions b/‎backends/gcu/README.md‎
Lines changed: 33 additions & 6 deletions
diff --git a/‎backends/gcu/README_cn.md‎
Lines changed: 29 additions & 6 deletions b/‎backends/gcu/README_cn.md‎
Lines changed: 29 additions & 6 deletions
diff --git a/‎backends/gcu/backend/executor/single_op_executor.cc‎
Lines changed: 6 additions & 6 deletions b/‎backends/gcu/backend/executor/single_op_executor.cc‎
Lines changed: 6 additions & 6 deletions
@@ -122,10 +122,10 @@ static std::vector<int64_t> ValidateShape(const std::vector<int64_t> shape,
 }
 
 template <typename T>
-void ReshapeInferKernel(const phi::Context& dev_ctx,
-                        const phi::DenseTensor& x,
-                        const phi::IntArray& shape,
-                        phi::DenseTensor* out) {
+void ReshapeKernel(const phi::Context& dev_ctx,
+                   const phi::DenseTensor& x,
+                   const phi::IntArray& shape,
+                   phi::DenseTensor* out) {
   auto x_dims = x.dims();
   auto out_dims = ValidateShape(shape.GetData(), x_dims);
   out->Resize(out_dims);
@@ -149,20 +149,20 @@ void ReshapeInferKernel(const phi::Context& dev_ctx,
 }
 
 template <typename T>
-void ReshapeKernel(const phi::Context& dev_ctx,
-                   const phi::DenseTensor& x,
-                   const phi::IntArray& shape,
-                   phi::DenseTensor* out,
-                   phi::DenseTensor* xshape) {
-  ReshapeInferKernel<T>(dev_ctx, x, shape, out);
+void ReshapeWithXShapeKernel(const phi::Context& dev_ctx,
+                             const phi::DenseTensor& x,
+                             const phi::IntArray& shape,
+                             phi::DenseTensor* out,
+                             phi::DenseTensor* xshape) {
+  ReshapeKernel<T>(dev_ctx, x, shape, out);
 }
 
 }  // namespace custom_kernel
 
-PD_BUILD_PHI_KERNEL(reshape_infer,
+PD_BUILD_PHI_KERNEL(reshape,
                     custom_cpu,
                     ALL_LAYOUT,
-                    custom_kernel::ReshapeInferKernel,
+                    custom_kernel::ReshapeKernel,
                     float,
                     double,
                     int8_t,
@@ -172,10 +172,10 @@ PD_BUILD_PHI_KERNEL(reshape_infer,
                     uint8_t,
                     bool) {}
 
-PD_BUILD_PHI_KERNEL(reshape,
+PD_BUILD_PHI_KERNEL(reshape_with_xshape,
                     custom_cpu,
                     ALL_LAYOUT,
-                    custom_kernel::ReshapeKernel,
+                    custom_kernel::ReshapeWithXShapeKernel,
                     float,
                     double,
                     int8_t,
 
@@ -70,7 +70,7 @@ def forward(self):
 
 
 def create_tensor(np_data, place):
-    tensor = core.LoDTensor()
+    tensor = core.DenseTensor()
     tensor.set(np_data, place)
     return tensor
 
 
@@ -516,7 +516,7 @@ def test_bool_tensor(self):
 #             self.assertTrue(np.allclose(a_1.numpy(), a[-3:3, 0:2, 2:4]))
 
 
-class TestSliceApiWithLoDTensorArray(unittest.TestCase):
+class TestSliceApiWithDenseTensorArray(unittest.TestCase):
     def setUp(self):
         self.shape = (3, 4)
         self.data = np.random.random(size=self.shape).astype("float32")
@@ -582,7 +582,7 @@ def test_case_1(self):
         main_program = base.Program()
         self.set_program_and_run(main_program, 1)
 
-        self.assertTrue(self.sliced_arr.type == core.VarDesc.VarType.LOD_TENSOR)
+        self.assertTrue(self.sliced_arr.type == core.VarDesc.VarType.DENSE_TENSOR)
         self.assertEqual(self.sliced_arr.shape, self.shape)
         self.assertTrue(np.array_equal(self.out, self.data))
         self.assertTrue(np.array_equal(self.g_x0, np.ones_like(self.data)))
@@ -593,7 +593,7 @@ def test_case_2(self):
         main_program = base.Program()
         self.set_program_and_run(main_program, 2)
 
-        self.assertTrue(self.sliced_arr.type == core.VarDesc.VarType.LOD_TENSOR_ARRAY)
+        self.assertTrue(self.sliced_arr.type == core.VarDesc.VarType.DENSE_TENSOR_ARRAY)
         self.assertEqual(self.sliced_arr.shape, self.shape)
         self.assertTrue(
             np.array_equal(self.out, np.stack([self.data, self.data], axis=self.axis))
@@ -606,7 +606,7 @@ def test_case_3(self):
         main_program = base.Program()
         self.set_program_and_run(main_program, 3)
 
-        self.assertTrue(self.sliced_arr.type == core.VarDesc.VarType.LOD_TENSOR_ARRAY)
+        self.assertTrue(self.sliced_arr.type == core.VarDesc.VarType.DENSE_TENSOR_ARRAY)
         self.assertEqual(self.sliced_arr.shape, self.shape)
         self.assertTrue(
             np.array_equal(
 
@@ -17,29 +17,7 @@ cmake_minimum_required(VERSION 3.10)
 project(paddle-custom-gcu CXX C)
 set(CUSTOM_GCU_NAME "paddle-custom-gcu")
 
-set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
-message(STATUS "CMAKE_DEBUG CMAKE_MODULE_PATH is: ${CMAKE_MODULE_PATH}")
-option(VERSION_WITH_GIT "Use git hash to PACKAGE_VERSION" FALSE)
-
-set(TOPS_RELEASE_VERSION 3.1.0)
-if((NOT DEFINED PACKAGE_VERSION)
-   OR ("${PACKAGE_VERSION}" STREQUAL "")
-   OR ("${PACKAGE_VERSION}" STREQUAL "123.456"))
-  if(VERSION_WITH_GIT)
-    # get Paddle-custom git hash
-    execute_process(
-      COMMAND git log -1 --abbrev=7 --format=%h
-      WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}
-      OUTPUT_VARIABLE _tag
-      OUTPUT_STRIP_TRAILING_WHITESPACE)
-    message(STATUS "git hash: ${_tag}")
-  else()
-    string(TIMESTAMP _tag "%Y%m%d")
-  endif()
-  set(PACKAGE_VERSION ${TOPS_RELEASE_VERSION}.${_tag})
-endif()
-
-message(STATUS "package version: ${PACKAGE_VERSION}")
+set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
 
 if(DEFINED PY_VERSION)
   message(STATUS "User define PY_VERSION: ${PY_VERSION}")
@@ -51,22 +29,15 @@ set(PYTHON_VERSION ${PY_VERSION})
 set(Python_EXECUTABLE "python${PY_VERSION}")
 message(STATUS "Python_EXECUTABLE: ${Python_EXECUTABLE}")
 
-if(NOT DEFINED ENV{PADDLE_VERSION})
-  set(ENV{PADDLE_VERSION} 3.0.0-beta1)
-endif()
-
 include(paddle)
 include(version)
 include(generic)
 include(external/gcu)
 include(external/topscc)
 
-include_directories(${CMAKE_CURRENT_SOURCE_DIR})
+include_directories(${CMAKE_SOURCE_DIR})
 include_directories(/opt/tops/include)
 
-set(OUTPUT_PADDLE_PACKAGE_VERSION ${PADDLE_VERSION}+${PACKAGE_VERSION})
-string(REPLACE "-beta" "b" OUTPUT_PADDLE_PACKAGE_VERSION
-               "${OUTPUT_PADDLE_PACKAGE_VERSION}")
 option(WITH_KERNELS "compile with custom kernels" ON)
 option(WITH_TESTING "compile with unit testing" OFF)
 option(WITH_MKL "compile with mkl support" ON)
@@ -205,13 +176,10 @@ message(STATUS "Git commit id is: ${GIT_HASH}")
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in
                ${CMAKE_CURRENT_BINARY_DIR}/setup.py)
 
-message(STATUS "CMAKE_DEBUG Git commit id is: ${GIT_HASH}")
-message(STATUS "CMAKE_DEBUG CUSTOM_GCU_SRCS is: ${CUSTOM_GCU_SRCS}")
-message(STATUS "CMAKE_DEBUG GCU_LIBS is: ${GCU_LIBS}")
-
+# packing wheel package
 add_custom_command(
-  OUTPUT
-    ${CMAKE_CURRENT_BINARY_DIR}/python/paddle_custom_device/lib${CUSTOM_GCU_NAME}.so
+  TARGET ${CUSTOM_GCU_NAME}
+  POST_BUILD
   COMMAND ${CMAKE_COMMAND} -E remove -f ${CMAKE_CURRENT_BINARY_DIR}/python/
   COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/python/
   COMMAND ${CMAKE_COMMAND} -E make_directory
@@ -220,16 +188,13 @@ add_custom_command(
     ${CMAKE_COMMAND} -E copy_if_different
     ${CMAKE_CURRENT_BINARY_DIR}/lib${CUSTOM_GCU_NAME}.so
     ${CMAKE_CURRENT_BINARY_DIR}/python/paddle_custom_device/
-  COMMENT "Creating custom device directories------>>>"
-  DEPENDS ${CUSTOM_GCU_NAME})
-add_custom_target(
-  CUSTOM_GCU_NAME_shared_lib_copy ALL
-  DEPENDS
-    ${CMAKE_CURRENT_BINARY_DIR}/python/paddle_custom_device/lib${CUSTOM_GCU_NAME}.so
-)
+  COMMENT "Creating custom device directories------>>>")
 
 set(topscc_kernel_lib_targets "")
 unset(topscc_kernel_lib_targets)
+set(_passes_target_dir
+    "${CMAKE_CURRENT_BINARY_DIR}/python/paddle_custom_device/gcu")
+file(MAKE_DIRECTORY ${_passes_target_dir})
 foreach(topscc_kernel_lib ${TOPSCC_LIBS})
   get_filename_component(topscc_kernel_lib_name ${topscc_kernel_lib} NAME_WLE)
   get_filename_component(topscc_kernel_lib_target_name ${topscc_kernel_lib}
@@ -250,29 +215,17 @@ foreach(topscc_kernel_lib ${TOPSCC_LIBS})
 endforeach()
 add_custom_target(topscc_targets ALL DEPENDS ${topscc_kernel_lib_targets})
 
-set(whl_file
-    "${CMAKE_CURRENT_BINARY_DIR}/dist/paddle_custom_gcu-${OUTPUT_PADDLE_PACKAGE_VERSION}-cp${PY_VERSION_NO_DOT}-cp${PY_VERSION_NO_DOT}-linux_x86_64.whl"
-)
-set(_passes_target_dir
-    "${CMAKE_CURRENT_BINARY_DIR}/python/paddle_custom_device/gcu/passes")
-file(MAKE_DIRECTORY ${_passes_target_dir})
-file(GLOB passes_srcs "${CMAKE_CURRENT_SOURCE_DIR}/passes/*")
-foreach(passes_src IN LISTS passes_srcs)
-  get_filename_component(passes_file_name ${passes_src} NAME)
-  add_custom_command(
-    OUTPUT ${_passes_target_dir}/${passes_file_name}
-    COMMAND ${CMAKE_COMMAND} -E copy_if_different ${passes_src}
-            ${_passes_target_dir}
-    DEPENDS ${passes_src})
-  list(APPEND passes_bin_files_list "${_passes_target_dir}/${passes_file_name}")
-endforeach()
 add_custom_command(
-  OUTPUT ${whl_file}
+  OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/python/.timestamp
+  COMMAND ${CMAKE_COMMAND} -E remove -f
+          ${CMAKE_CURRENT_BINARY_DIR}/python/paddle_custom_device/gcu/passes
+  COMMAND ${CMAKE_COMMAND} -E make_directory
+          ${CMAKE_CURRENT_BINARY_DIR}/python/paddle_custom_device/gcu/passes
+  COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_SOURCE_DIR}/passes/*
+          ${CMAKE_CURRENT_BINARY_DIR}/python/paddle_custom_device/gcu/passes
   COMMAND ${Python_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/setup.py bdist_wheel
-  # COMMAND rename 's/_${TOPS_RELEASE_VERSION}/-${TOPS_RELEASE_VERSION}/g'
-  # ${CMAKE_CURRENT_BINARY_DIR}/dist/*.whl
-  DEPENDS ${CUSTOM_GCU_NAME} CUSTOM_GCU_NAME_shared_lib_copy topscc_targets
-          ${CMAKE_CURRENT_BINARY_DIR}/setup.py ${passes_bin_files_list}
+  DEPENDS ${CUSTOM_GCU_NAME}
   COMMENT "Packing whl packages------>>>")
 
-add_custom_target(python_package ALL DEPENDS ${whl_file})
+add_custom_target(python_package ALL
+                  DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/python/.timestamp)
@@ -7,11 +7,38 @@ Please refer to the following steps to compile, install and verify the hardware
 ## Prepare environment and source code
 
 ```bash
-# 1) Pull PaddlePaddle development docker image，and install Enflame GCU development kit.
+# 1) Pull the image. Note that this image is only for development environment
+#    and does not contain precompiled PaddlePaddle installation package.
+#    The build script and dockerfile of this image are located in the tools/dockerfile directory.
+ccr-2vdh3abv-pub.cnc.bj.baidubce.com/device/paddle-gcu:topsrider3.2.109-ubuntu20-x86_64-gcc84 /bin/bash
 
-# 2) Clone the source code.
+# 2) Refer to the following command to start the container.
+docker run --name paddle-gcu-dev -v /home:/home \
+    --network=host --ipc=host -it --privileged \
+    ccr-2vdh3abv-pub.cnc.bj.baidubce.com/device/paddle-gcu:topsrider3.2.109-ubuntu20-x86_64-gcc84 /bin/bash
+
+# 3) Clone the source code.
 git clone https://github.com/PaddlePaddle/PaddleCustomDevice
 cd PaddleCustomDevice
+
+# 4) Prepare the machine and initialize the environment (only required for device used for execution).
+# 4a) Get the driver: The full software package is placed in docker in advance
+#     and needs to be copied to the directory outside docker, such as: /home/workspace/deps/.
+mkdir -p /home/workspace/deps/ && cp /root/TopsRider_i3x_*/TopsRider_i3x_*_deb_amd64.run /home/workspace/deps/
+
+# 4b) Verify whether the machine has Enflame S60 accelerators.
+#     Check whether the following command has output in the system environment.
+#     Note: You need to press Ctrl+D to exit docker.
+#     The following initialization environment operations are all performed in the system environment.
+lspci | grep S60
+
+# 4c) Install the driver.
+cd /home/workspace/deps/
+bash TopsRider_i3x_*_deb_amd64.run --driver --no-auto-load
+
+# 4d) After the driver is installed, refer to the following command to re-enter docker.
+docker start paddle-gcu-dev
+docker exec -it paddle-gcu-dev bash
 ```
 
 ## PaddleCustomDevice Installation and Verification
@@ -20,7 +47,7 @@ cd PaddleCustomDevice
 
 ```bash
 # 1) Enter the hardware backend (Enflame GCU) directory.
-cd backends/gcu
+cd PaddleCustomDevice/backends/gcu
 
 # 2) Before compiling, you need to ensure that the PaddlePaddle installation package is installed in the environment.
 #    Just install the PaddlePaddle CPU version directly.
@@ -47,9 +74,9 @@ python -c "import paddle; print(paddle.device.get_all_custom_device_type())"
 # 2) Check currently installed version.
 python -c "import paddle_custom_device; paddle_custom_device.gcu.version()"
 # Expect to get output like this.
-version: 0.0.0.9e03b0a
-commit: 9e03b0a42a530d07fb60e141ee618fc02595bd96
-tops-sdk: 2.5.20231128
+version: 3.0.0b1+3.1.0.20241113
+commit: f05823682bf607deb1b4adf9a9309f81225958fe
+TopsPlatform: 1.2.0.301
 
 # 3) Unit test, compiled with -DWITH_TESTING=ON and executed in the build directory.
 ctest
 
@@ -7,11 +7,34 @@
 ## 环境准备与源码同步
 
 ```bash
-# 1) 获取PaddlePaddle Docker镜像，并安装燧原GCU软件栈
+# 1) 拉取镜像，注意此镜像仅为开发环境，镜像中不包含预编译的飞桨安装包
+#    此镜像的构建脚本与dockerfile位于tools/dockerfile目录下
+ccr-2vdh3abv-pub.cnc.bj.baidubce.com/device/paddle-gcu:topsrider3.2.109-ubuntu20-x86_64-gcc84 /bin/bash
 
-# 2) 克隆PaddleCustomDevice源码
+# 2) 参考如下命令启动容器
+docker run --name paddle-gcu-dev -v /home:/home \
+    --network=host --ipc=host -it --privileged \
+    ccr-2vdh3abv-pub.cnc.bj.baidubce.com/device/paddle-gcu:topsrider3.2.109-ubuntu20-x86_64-gcc84 /bin/bash
+
+# 3) 克隆PaddleCustomDevice源码
 git clone https://github.com/PaddlePaddle/PaddleCustomDevice
 cd PaddleCustomDevice
+
+# 4) 机器准备，初始化环境(仅用于执行的设备需要)
+# 4a) 驱动获取：docker内提前放置了全量软件包，需拷贝至docker外目录，如：/home/workspace/deps/
+mkdir -p /home/workspace/deps/ && cp /root/TopsRider_i3x_*/TopsRider_i3x_*_deb_amd64.run /home/workspace/deps/
+
+# 4b) 验证机器是否插有燧原S60加速卡，系统环境下查看如下命令是否有输出
+#     注：需Ctrl+D退出docker， 以下初始化环境相关操作均在系统环境下执行
+lspci | grep S60
+
+# 4c) 安装驱动
+cd /home/workspace/deps/
+bash TopsRider_i3x_*_deb_amd64.run --driver --no-auto-load
+
+# 4d) 驱动安装完成后重新进入docker，参考如下命令
+docker start paddle-gcu-dev
+docker exec -it paddle-gcu-dev bash
 ```
 
 ## PaddleCustomDevice安装与运行
@@ -20,7 +43,7 @@ cd PaddleCustomDevice
 
 ```bash
 # 1) 进入硬件后端(燧原GCU)目录
-cd backends/gcu
+cd PaddleCustomDevice/backends/gcu
 
 # 2) 编译之前需确保环境下装有飞桨安装包，直接安装飞桨CPU版本即可
 python -m pip install --pre paddlepaddle -i https://www.paddlepaddle.org.cn/packages/nightly/cpu/
@@ -46,9 +69,9 @@ python -c "import paddle; print(paddle.device.get_all_custom_device_type())"
 # 2) 检查当前安装版本
 python -c "import paddle_custom_device; paddle_custom_device.gcu.version()"
 # 预期得到类似以下的输出结果
-version: 0.0.0.9e03b0a
-commit: 9e03b0a42a530d07fb60e141ee618fc02595bd96
-tops-sdk: 2.5.20231128
+version: 3.0.0b1+3.1.0.20241113
+commit: f05823682bf607deb1b4adf9a9309f81225958fe
+TopsPlatform: 1.2.0.301
 
 # 3) 单元测试，带上-DWITH_TESTING=ON编译后在build目录下执行
 ctest
 
@@ -35,7 +35,7 @@ limitations under the License. */
 namespace backend {
 using TensorPtr = std::shared_ptr<phi::DenseTensor>;
 using phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
+using DenseTensor = phi::DenseTensor;
 
 SingleOpGcuExecutor::SingleOpGcuExecutor(
     const std::string& op_type,
@@ -67,15 +67,15 @@ void SingleOpGcuExecutor::ReleaseResource() {
 }
 
 void SingleOpGcuExecutor::RunGcuOp(const phi::CustomContext* device_context,
-                                   const std::vector<LoDTensor*>& inputs,
-                                   const std::vector<LoDTensor*>& outputs,
+                                   const std::vector<DenseTensor*>& inputs,
+                                   const std::vector<DenseTensor*>& outputs,
                                    bool tensor_split) {
   std::vector<void*> dev_inputs;
   dev_inputs.reserve(inputs.size());
   std::vector<void*> dev_outputs;
   dev_outputs.resize(outputs.size());
 
-  static LoDTensor tmp_out_tensor;
+  static DenseTensor tmp_out_tensor;
   static std::once_flag alloc_flags;
   std::call_once(alloc_flags, [&]() {
     const phi::DenseTensorMeta meta(phi::DataType::FLOAT32,
@@ -84,8 +84,8 @@ void SingleOpGcuExecutor::RunGcuOp(const phi::CustomContext* device_context,
     device_context->Alloc<float>(&tmp_out_tensor);
   });
 
-  std::vector<LoDTensor*> real_inputs;
-  std::vector<LoDTensor*> real_outputs;
+  std::vector<DenseTensor*> real_inputs;
+  std::vector<DenseTensor*> real_outputs;
   real_inputs.reserve(inputs.size());
   real_outputs.reserve(outputs.size());