Skip to content

Commit 77090b6

Browse files
authored
[GCU] update to paddlepaddle develop (#1423)
1 parent 9b65d57 commit 77090b6

15 files changed

+295
-225
lines changed

backends/gcu/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,12 @@ cd backends/gcu
2424

2525
# 2) Before compiling, you need to ensure that the PaddlePaddle installation package is installed in the environment.
2626
# Just install the PaddlePaddle CPU version directly.
27-
python -m pip install paddlepaddle==0.0.0 -f https://www.paddlepaddle.org.cn/whl/linux/cpu-mkl/develop.html
27+
python -m pip install --pre paddlepaddle -i https://www.paddlepaddle.org.cn/packages/nightly/cpu/
2828

2929
# 3) Start compiling, and submodules will be downloaded on demand during compilation.
3030
mkdir -p build && cd build
3131
export PADDLE_CUSTOM_PATH=`python -c "import re, paddle; print(re.compile('/__init__.py.*').sub('',paddle.__file__))"`
32-
cmake .. -DWITH_TESTING=ON -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DPY_VERSION=3.9
32+
cmake .. -DWITH_TESTING=ON -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DPY_VERSION=3.10
3333
make -j $(nproc)
3434

3535
# 4) The compiled product is in the build/dist path and installed using pip.

backends/gcu/README_cn.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,12 @@ cd PaddleCustomDevice
2323
cd backends/gcu
2424

2525
# 2) 编译之前需确保环境下装有飞桨安装包,直接安装飞桨CPU版本即可
26-
python -m pip install paddlepaddle==0.0.0 -f https://www.paddlepaddle.org.cn/whl/linux/cpu-mkl/develop.html
26+
python -m pip install --pre paddlepaddle -i https://www.paddlepaddle.org.cn/packages/nightly/cpu/
2727

2828
# 3) 编译,编译时会按需下载submodule
2929
mkdir -p build && cd build
3030
export PADDLE_CUSTOM_PATH=`python -c "import re, paddle; print(re.compile('/__init__.py.*').sub('',paddle.__file__))"`
31-
cmake .. -DWITH_TESTING=ON -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DPY_VERSION=3.9
31+
cmake .. -DWITH_TESTING=ON -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DPY_VERSION=3.10
3232
make -j $(nproc)
3333

3434
# 4) 编译产出在build/dist路径下,使用pip安装

backends/gcu/kernels/activation_kernels.cc

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -791,6 +791,21 @@ void EluKernel(const Context& dev_ctx,
791791
}
792792
}
793793

794+
template <typename T, typename Context>
795+
void RoundKernel(const Context& dev_ctx,
796+
const phi::DenseTensor& x,
797+
const int decimals,
798+
phi::DenseTensor* out) {
799+
PADDLE_GCU_KERNEL_TRACE("round");
800+
if (LaunchAOTKernel()) {
801+
dev_ctx.template Alloc<T>(out);
802+
LAUNCH_TOPSATENOP(topsatenRound, dev_ctx, *out, x, decimals);
803+
804+
} else { // kernel impl base on JIT
805+
THROW_JIT_UNIMPLEMENTED();
806+
}
807+
}
808+
794809
DEFINE_UNARY_AOT_ACTIVATION_KERNEL(logsigmoid, LogSigmoid)
795810
DEFINE_UNARY_AOT_ACTIVATION_KERNEL(rsqrt, Rsqrt)
796811
DEFINE_UNARY_AOT_ACTIVATION_KERNEL(log2, Log2)
@@ -805,7 +820,6 @@ DEFINE_UNARY_AOT_ACTIVATION_KERNEL(asinh, Asinh)
805820
DEFINE_UNARY_AOT_ACTIVATION_KERNEL(atanh, Atanh)
806821
DEFINE_UNARY_AOT_ACTIVATION_KERNEL(cosh, Cosh)
807822
DEFINE_UNARY_AOT_ACTIVATION_KERNEL(sinh, Sinh)
808-
DEFINE_UNARY_AOT_ACTIVATION_KERNEL(round, Round)
809823
DEFINE_UNARY_AOT_ACTIVATION_KERNEL(tan, Tan)
810824
DEFINE_UNARY_AOT_ACTIVATION_KERNEL(erf, Erf)
811825
DEFINE_UNARY_AOT_ACTIVATION_KERNEL(expm1, Expm1)

backends/gcu/kernels/concat_kernel.cc

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,19 @@ void ConcatKernel(const Context& dev_ctx,
2323
const phi::Scalar& axis_scalar,
2424
phi::DenseTensor* out) {
2525
PADDLE_GCU_KERNEL_TRACE("concat");
26+
int64_t dim = axis_scalar.to<int64_t>();
27+
if (common::contain_unknown_dim(out->dims())) {
28+
std::vector<phi::MetaTensor> x_meta_vec;
29+
x_meta_vec.reserve(ins.size());
30+
std::vector<const phi::MetaTensor*> x_metas(ins.size(), nullptr);
31+
for (size_t i = 0; i < ins.size(); ++i) {
32+
x_meta_vec.emplace_back(*ins[i]);
33+
x_metas[i] = &x_meta_vec[i];
34+
}
35+
phi::MetaTensor meta_out(*out);
36+
phi::ConcatInferMeta(x_metas, dim, &meta_out);
37+
}
38+
2639
dev_ctx.template Alloc<T>(out);
2740

2841
if (LaunchAOTKernel()) {
@@ -54,7 +67,7 @@ void ConcatKernel(const Context& dev_ctx,
5467
}
5568
in_tensors.emplace_back(CreateTopsatenTensor(tensor));
5669
}
57-
int64_t dim = axis_scalar.to<int64_t>();
70+
5871
if (dim < 0 && !ins.empty()) {
5972
dim += ins[0]->dims().size();
6073
}

backends/gcu/kernels/flatten_kernel.cc

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,14 @@
1717

1818
namespace custom_kernel {
1919
template <typename T, typename Context>
20-
void FlattenInferKernel(const Context& dev_ctx,
21-
const phi::DenseTensor& x,
22-
int start_axis UNUSED,
23-
int stop_axis UNUSED,
24-
phi::DenseTensor* out) {
25-
PADDLE_GCU_KERNEL_TRACE("flatten_infer");
20+
void FlattenKernel(const Context& dev_ctx,
21+
const phi::DenseTensor& x,
22+
int start_axis UNUSED,
23+
int stop_axis UNUSED,
24+
phi::DenseTensor* out) {
25+
PADDLE_GCU_KERNEL_TRACE("flatten");
2626
if (LaunchAOTKernel()) {
27-
VLOG(6) << "[HOST_KERNEL] Impl on host for flatten_infer";
27+
VLOG(6) << "[HOST_KERNEL] Impl on host for flatten";
2828
if (x.numel() == 0) {
2929
return;
3030
}
@@ -59,18 +59,19 @@ void FlattenInferKernel(const Context& dev_ctx,
5959
}
6060

6161
template <typename T, typename Context>
62-
void FlattenKernel(const Context& dev_ctx,
63-
const phi::DenseTensor& x,
64-
int start_axis,
65-
int stop_axis,
66-
phi::DenseTensor* out,
67-
phi::DenseTensor* xshape) {
68-
PADDLE_GCU_KERNEL_TRACE("flatten");
62+
void FlattenWithXShapeKernel(const Context& dev_ctx,
63+
const phi::DenseTensor& x,
64+
int start_axis,
65+
int stop_axis,
66+
phi::DenseTensor* out,
67+
phi::DenseTensor* xshape) {
68+
PADDLE_GCU_KERNEL_TRACE("flatten_with_xshape");
6969
if (LaunchAOTKernel()) {
70-
custom_kernel::FlattenInferKernel<T, Context>(
70+
custom_kernel::FlattenKernel<T, Context>(
7171
dev_ctx, x, start_axis, stop_axis, out);
7272

7373
} else { // kernel impl base on JIT
74+
THROW_JIT_UNIMPLEMENTED();
7475
dev_ctx.template Alloc<T>(out);
7576
dev_ctx.template Alloc<T>(xshape);
7677

@@ -161,10 +162,10 @@ PD_REGISTER_PLUGIN_KERNEL(flatten,
161162
int,
162163
int64_t) {}
163164

164-
PD_REGISTER_PLUGIN_KERNEL(flatten_infer,
165+
PD_REGISTER_PLUGIN_KERNEL(flatten_with_xshape,
165166
gcu,
166167
ALL_LAYOUT,
167-
custom_kernel::FlattenInferKernel,
168+
custom_kernel::FlattenWithXShapeKernel,
168169
phi::dtype::bfloat16,
169170
phi::dtype::float16,
170171
float,

backends/gcu/kernels/funcs/op_utils.cc

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,25 @@
1414

1515
#include "kernels/funcs/op_utils.h"
1616

17+
#include "paddle/common/flags.h"
1718
#include "paddle/phi/common/data_type.h"
1819
#include "paddle/phi/core/dense_tensor.h"
1920

21+
PHI_DECLARE_bool(use_stride_kernel);
22+
2023
namespace custom_kernel {
2124

2225
void *GcuDataPtr(const phi::DenseTensor &tensor) {
2326
if (tensor.initialized()) {
24-
return const_cast<void *>(tensor.data());
27+
auto contiguous_strides = phi::DenseTensorMeta::calc_strides(tensor.dims());
28+
bool is_contiguous = (tensor.strides() == contiguous_strides);
29+
auto tensor_tmp = tensor;
30+
if (!is_contiguous && !FLAGS_use_stride_kernel) {
31+
auto meta = tensor_tmp.meta();
32+
meta.strides = contiguous_strides;
33+
tensor_tmp.set_meta(meta);
34+
}
35+
return const_cast<void *>(tensor_tmp.data());
2536
}
2637
return nullptr;
2738
}

backends/gcu/kernels/logsumexp_kernel.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,15 @@ namespace custom_kernel {
1919
template <typename T, typename Context>
2020
void LogsumexpKernel(const Context& dev_ctx,
2121
const phi::DenseTensor& x,
22-
const std::vector<int64_t>& axis,
22+
const std::vector<int>& axis,
2323
bool keepdim,
2424
bool reduce_all,
2525
phi::DenseTensor* out) {
2626
PADDLE_GCU_KERNEL_TRACE("logsumexp");
2727
if (LaunchAOTKernel()) {
2828
dev_ctx.template Alloc<T>(out);
2929

30-
auto reduce_axis = axis;
30+
std::vector<int64_t> reduce_axis(axis.begin(), axis.end());
3131
int64_t rank = x.dims().size();
3232
if (reduce_all || reduce_axis.empty()) {
3333
reduce_axis.assign(rank, 0);

backends/gcu/kernels/reshape_kernel.cc

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -147,10 +147,11 @@ void InferMetaFromVecValue(const phi::DenseTensor& x,
147147
} // namespace
148148

149149
template <typename T, typename Context>
150-
void ReshapeInferKernel(const Context& dev_ctx,
151-
const phi::DenseTensor& x,
152-
const phi::IntArray& shape,
153-
phi::DenseTensor* out) {
150+
void ReshapeKernel(const Context& dev_ctx,
151+
const phi::DenseTensor& x,
152+
const phi::IntArray& shape,
153+
phi::DenseTensor* out) {
154+
PADDLE_GCU_KERNEL_TRACE("reshape");
154155
PADDLE_ENFORCE_NE(
155156
x.layout(),
156157
common::DataLayout::kNDHWC,
@@ -180,13 +181,13 @@ void ReshapeInferKernel(const Context& dev_ctx,
180181
}
181182

182183
template <typename T, typename Context>
183-
void ReshapeKernel(const Context& dev_ctx,
184-
const phi::DenseTensor& x,
185-
const phi::IntArray& shape,
186-
phi::DenseTensor* out,
187-
phi::DenseTensor* xshape) {
188-
PADDLE_GCU_KERNEL_TRACE("reshape");
189-
ReshapeInferKernel<T>(dev_ctx, x, shape, out);
184+
void ReshapeWithXShapeKernel(const Context& dev_ctx,
185+
const phi::DenseTensor& x,
186+
const phi::IntArray& shape,
187+
phi::DenseTensor* out,
188+
phi::DenseTensor* xshape) {
189+
PADDLE_GCU_KERNEL_TRACE("reshape_with_xshape");
190+
ReshapeKernel<T>(dev_ctx, x, shape, out);
190191
}
191192

192193
template <typename T, typename Context>
@@ -251,6 +252,19 @@ PD_REGISTER_PLUGIN_KERNEL(reshape,
251252
uint8_t,
252253
bool) {}
253254

255+
PD_REGISTER_PLUGIN_KERNEL(reshape_with_xshape,
256+
gcu,
257+
ALL_LAYOUT,
258+
custom_kernel::ReshapeWithXShapeKernel,
259+
float,
260+
phi::dtype::float16,
261+
double,
262+
int8_t,
263+
int16_t,
264+
int32_t,
265+
int64_t,
266+
uint8_t,
267+
bool) {}
254268
// PD_REGISTER_PLUGIN_KERNEL(reshape_grad,
255269
// gcu,
256270
// ALL_LAYOUT,

backends/gcu/tests/fuse_pass/test_custom_pass_gcu.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,9 @@ def replace(x, y, z):
3636

3737
@paddle.jit.to_static(
3838
input_spec=[
39-
paddle.static.InputSpec([None, 32], "int32", "x"),
40-
paddle.static.InputSpec([None, 32], "int32", "y"),
41-
paddle.static.InputSpec([None, 32], "int32", "z"),
39+
paddle.static.InputSpec([None, 32], "float32", "x"),
40+
paddle.static.InputSpec([None, 32], "float32", "y"),
41+
paddle.static.InputSpec([None, 32], "float32", "z"),
4242
]
4343
)
4444
def func(x, y, z):
@@ -56,7 +56,8 @@ def setUp(self):
5656
paddle.utils.cpp_extension.extension_utils.load_op_meta_info_and_register_op(
5757
lib
5858
)
59-
paddle.jit.save(func, MODEL_FILE)
59+
with paddle.pir_utils.OldIrGuard():
60+
paddle.jit.save(func, MODEL_FILE)
6061

6162
def test_my_add_n(self):
6263
config = paddle.inference.Config()

backends/gcu/tests/unittests/test_diag.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,12 @@
2222
# The table retains its original format for better comparison of parameter settings.
2323
# fmt: off
2424
DIAG_CASE = [
25-
{"shape": [3], "dtype": np.float32, "offset": 0, "padding_value": 0},
26-
{"shape": [3], "dtype": np.float32, "offset": 1, "padding_value": 0},
27-
{"shape": [3], "dtype": np.float32, "offset": -1, "padding_value": 0},
28-
{"shape": [3], "dtype": np.float32, "offset": 0, "padding_value": 6},
29-
{"shape": [3], "dtype": np.float32, "offset": 1, "padding_value": 6},
30-
{"shape": [3], "dtype": np.float32, "offset": -1, "padding_value": 6},
3125
{"shape": [3, 3], "dtype": np.float32, "offset": 0, "padding_value": 0},
26+
{"shape": [3, 3], "dtype": np.float32, "offset": 1, "padding_value": 0},
27+
{"shape": [3, 3], "dtype": np.float32, "offset": -1, "padding_value": 0},
28+
{"shape": [3, 6], "dtype": np.float32, "offset": 0, "padding_value": 6},
29+
{"shape": [3, 6], "dtype": np.float32, "offset": 1, "padding_value": 6},
30+
{"shape": [3, 6], "dtype": np.float32, "offset": -1, "padding_value": 6},
3231
]
3332
# fmt: on
3433

0 commit comments

Comments
 (0)