Skip to content

Commit e3a64fc

Browse files
authored
Merge pull request #13835 from velconia/fix_reshape_op
Fix Reshape op when input is the same with output
2 parents 46b0b79 + aeec82a commit e3a64fc

File tree

4 files changed

+41
-4
lines changed

4 files changed

+41
-4
lines changed

paddle/fluid/framework/tensor_util.cc

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,11 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
3636
auto size = src.numel() * SizeOfType(src.type());
3737

3838
if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
39+
if (src_ptr == dst_ptr) {
40+
VLOG(3) << "Skip copy the same data async from " << src_place << " to "
41+
<< dst_place;
42+
return;
43+
}
3944
memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr,
4045
boost::get<platform::CPUPlace>(src_place), src_ptr, size);
4146
}
@@ -71,6 +76,11 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
7176
auto stream =
7277
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
7378
if (platform::is_same_place(src_place, dst_place)) {
79+
if (src_ptr == dst_ptr) {
80+
VLOG(3) << "Skip copy the same data async from " << src_place << " to "
81+
<< dst_place;
82+
return;
83+
}
7484
memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size,
7585
stream);
7686
} else {
@@ -114,6 +124,11 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
114124
auto dst_ptr = dst->mutable_data(dst_place, src.type());
115125
auto size = src.numel() * SizeOfType(src.type());
116126
if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
127+
if (src_ptr == dst_ptr) {
128+
VLOG(3) << "Skip copy the same data from " << src_place << " to "
129+
<< dst_place;
130+
return;
131+
}
117132
memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr,
118133
boost::get<platform::CPUPlace>(src_place), src_ptr, size);
119134
}
@@ -130,6 +145,11 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
130145
memory::Copy(dst_gpu_place, dst_ptr, src_cpu_place, src_ptr, size, nullptr);
131146
} else if (platform::is_gpu_place(src_place) &&
132147
platform::is_gpu_place(dst_place)) {
148+
if (src_ptr == dst_ptr && platform::is_same_place(src_place, dst_place)) {
149+
VLOG(3) << "Skip copy the same data from " << src_place << " to "
150+
<< dst_place;
151+
return;
152+
}
133153
auto src_gpu_place = boost::get<platform::CUDAPlace>(src_place);
134154
auto dst_gpu_place = boost::get<platform::CUDAPlace>(dst_place);
135155
memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, nullptr);

paddle/fluid/framework/tensor_util_test.cc

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,11 @@ TEST(TensorCopy, Tensor) {
4141
EXPECT_EQ(src_ptr[i], dst_ptr[i]);
4242
}
4343

44+
TensorCopy(dst_tensor, *cpu_place, &dst_tensor);
45+
for (size_t i = 0; i < 9; ++i) {
46+
EXPECT_EQ(src_ptr[i], dst_ptr[i]);
47+
}
48+
4449
EXPECT_TRUE(dst_tensor.layout() == src_tensor.layout());
4550

4651
Tensor slice_tensor = src_tensor.Slice(1, 2);
@@ -82,6 +87,15 @@ TEST(TensorCopy, Tensor) {
8287
EXPECT_EQ(src_ptr[i], dst_ptr[i]);
8388
}
8489

90+
// Copy the same tensor
91+
TensorCopy(gpu_tensor, *gpu_place, gpu_ctx, &gpu_tensor);
92+
gpu_ctx.Wait();
93+
const int* dst_ptr_tmp = dst_tensor.data<int>();
94+
EXPECT_NE(src_ptr, dst_ptr_tmp);
95+
for (size_t i = 0; i < 9; ++i) {
96+
EXPECT_EQ(src_ptr[i], dst_ptr_tmp[i]);
97+
}
98+
8599
Tensor slice_tensor = src_tensor.Slice(1, 2);
86100

87101
// CPU Slice Tensor to GPU Tensor

paddle/fluid/operators/reshape_op.cc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ dimension value will be copied from Input(X) at runtime. Note that the index of
164164
[2, 3, 4], Attr(shape) = [2, 3, 2, 0] is an invalid input.
165165
166166
3. Input(Shape) has a higher priority than Attr(shape) if it is provided, while
167-
Attr(shape) still should be set correctly to gurantee shape inference in
167+
Attr(shape) still should be set correctly to gurantee shape inference in
168168
compile-time.
169169
170170
)DOC");
@@ -259,7 +259,6 @@ class Reshape2Op : public ReshapeOp {
259259
: ReshapeOp(type, inputs, outputs, attrs) {}
260260

261261
void InferShape(framework::InferShapeContext *ctx) const override {
262-
ReshapeOp::InferShape(ctx);
263262
PADDLE_ENFORCE(ctx->HasOutput("XShape"),
264263
"Output(XShape) of ReshapeOp should not be null.");
265264
const auto &x_dims = ctx->GetInputDim("X");
@@ -270,6 +269,8 @@ class Reshape2Op : public ReshapeOp {
270269
}
271270
ctx->SetOutputDim("XShape", framework::make_ddim(xshape_dims));
272271
ctx->ShareLoD("X", /*->*/ "XShape");
272+
273+
ReshapeOp::InferShape(ctx);
273274
}
274275
};
275276

paddle/fluid/operators/sequence_concat_op.cc

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,11 +90,13 @@ REGISTER_OPERATOR(sequence_concat, paddle::framework::OperatorWithKernel,
9090
paddle::framework::DefaultGradOpDescMaker<false>);
9191
template <typename T>
9292
using Kernel = op::SeqConcatKernel<paddle::platform::CPUDeviceContext, T>;
93-
REGISTER_OP_CPU_KERNEL(sequence_concat, Kernel<float>, Kernel<double>);
93+
REGISTER_OP_CPU_KERNEL(sequence_concat, Kernel<float>, Kernel<double>,
94+
Kernel<int64_t>);
95+
9496
REGISTER_OPERATOR(sequence_concat_grad, paddle::framework::OperatorWithKernel,
9597
op::SeqConcatGradShapeInferer);
9698
template <typename T>
9799
using GradKernel =
98100
op::SeqConcatGradKernel<paddle::platform::CPUDeviceContext, T>;
99101
REGISTER_OP_CPU_KERNEL(sequence_concat_grad, GradKernel<float>,
100-
GradKernel<double>);
102+
GradKernel<double>, GradKernel<int64_t>);

0 commit comments

Comments
 (0)