Skip to content

Commit b708ec0

Browse files
authored
Merge pull request #10412 from JiayiFeng/correct_TensorCopy_misuse
Correct tensor copy misuse
2 parents 76b63c2 + 0c99cd7 commit b708ec0

File tree

5 files changed

+25
-30
lines changed

5 files changed

+25
-30
lines changed

paddle/fluid/operators/lod_reset_op.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,7 @@ class LoDResetKernel : public framework::OpKernel<T> {
4646
auto* lod = lod_t->data<int>();
4747
if (platform::is_gpu_place(ctx.GetPlace())) {
4848
framework::Tensor lod_cpu;
49-
framework::TensorCopy(*lod_t, platform::CPUPlace(),
50-
ctx.device_context(), &lod_cpu);
49+
framework::TensorCopySync(*lod_t, platform::CPUPlace(), &lod_cpu);
5150
lod = lod_cpu.data<int>();
5251
}
5352
level0 = std::vector<int>(lod, lod + lod_t->numel());

paddle/fluid/operators/math/concat_test.cc

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,8 @@ void testConcat() {
6969
}
7070

7171
if (paddle::platform::is_gpu_place(Place())) {
72-
paddle::framework::TensorCopy(input_a_cpu, Place(), *context, &input_a);
73-
paddle::framework::TensorCopy(input_b_cpu, Place(), *context, &input_b);
72+
paddle::framework::TensorCopySync(input_a_cpu, Place(), &input_a);
73+
paddle::framework::TensorCopySync(input_b_cpu, Place(), &input_b);
7474
}
7575

7676
std::vector<paddle::framework::Tensor> input;
@@ -86,8 +86,8 @@ void testConcat() {
8686

8787
int* out_ptr;
8888
if (paddle::platform::is_gpu_place(Place())) {
89-
paddle::framework::TensorCopy(out, paddle::platform::CPUPlace(), *context,
90-
&out_cpu);
89+
paddle::framework::TensorCopySync(out, paddle::platform::CPUPlace(),
90+
&out_cpu);
9191
out_ptr = out_cpu.data<int>();
9292
} else {
9393
out_ptr = out.data<int>();
@@ -142,8 +142,8 @@ void testConcat() {
142142
}
143143

144144
if (paddle::platform::is_gpu_place(Place())) {
145-
paddle::framework::TensorCopy(input_a_cpu, Place(), *context, &input_a);
146-
paddle::framework::TensorCopy(input_b_cpu, Place(), *context, &input_b);
145+
paddle::framework::TensorCopySync(input_a_cpu, Place(), &input_a);
146+
paddle::framework::TensorCopySync(input_b_cpu, Place(), &input_b);
147147
}
148148

149149
input.clear();
@@ -157,8 +157,8 @@ void testConcat() {
157157
PADDLE_ENFORCE_EQ(input_b.dims(), dim_b);
158158

159159
if (paddle::platform::is_gpu_place(Place())) {
160-
paddle::framework::TensorCopy(out, paddle::platform::CPUPlace(), *context,
161-
&out_cpu);
160+
paddle::framework::TensorCopySync(out, paddle::platform::CPUPlace(),
161+
&out_cpu);
162162
out_ptr = out_cpu.data<int>();
163163
} else {
164164
out_ptr = out.data<int>();
@@ -215,8 +215,8 @@ void testConcat() {
215215
}
216216

217217
if (paddle::platform::is_gpu_place(Place())) {
218-
paddle::framework::TensorCopy(input_a_cpu, Place(), *context, &input_a);
219-
paddle::framework::TensorCopy(input_b_cpu, Place(), *context, &input_b);
218+
paddle::framework::TensorCopySync(input_a_cpu, Place(), &input_a);
219+
paddle::framework::TensorCopySync(input_b_cpu, Place(), &input_b);
220220
}
221221

222222
input.clear();
@@ -230,8 +230,8 @@ void testConcat() {
230230
PADDLE_ENFORCE_EQ(input_b.dims(), dim_b);
231231

232232
if (paddle::platform::is_gpu_place(Place())) {
233-
paddle::framework::TensorCopy(out, paddle::platform::CPUPlace(), *context,
234-
&out_cpu);
233+
paddle::framework::TensorCopySync(out, paddle::platform::CPUPlace(),
234+
&out_cpu);
235235
out_ptr = out_cpu.data<int>();
236236
} else {
237237
out_ptr = out.data<int>();
@@ -290,8 +290,8 @@ void testConcat() {
290290
}
291291

292292
if (paddle::platform::is_gpu_place(Place())) {
293-
paddle::framework::TensorCopy(input_a_cpu, Place(), *context, &input_a);
294-
paddle::framework::TensorCopy(input_b_cpu, Place(), *context, &input_b);
293+
paddle::framework::TensorCopySync(input_a_cpu, Place(), &input_a);
294+
paddle::framework::TensorCopySync(input_b_cpu, Place(), &input_b);
295295
}
296296

297297
input.clear();
@@ -305,8 +305,8 @@ void testConcat() {
305305
PADDLE_ENFORCE_EQ(input_b.dims(), dim_b);
306306

307307
if (paddle::platform::is_gpu_place(Place())) {
308-
paddle::framework::TensorCopy(out, paddle::platform::CPUPlace(), *context,
309-
&out_cpu);
308+
paddle::framework::TensorCopySync(out, paddle::platform::CPUPlace(),
309+
&out_cpu);
310310
out_ptr = out_cpu.data<int>();
311311
} else {
312312
out_ptr = out.data<int>();

paddle/fluid/operators/math/sequence_padding_test.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ void TestSequencePadding(const paddle::framework::LoD& lod,
4141
if (paddle::platform::is_cpu_place(*place)) {
4242
seq = cpu_seq;
4343
} else {
44-
TensorCopy(cpu_seq, *place, *context, &seq);
44+
TensorCopySync(cpu_seq, *place, &seq);
4545
seq.set_lod(lod);
4646
}
4747

@@ -64,7 +64,7 @@ void TestSequencePadding(const paddle::framework::LoD& lod,
6464
if (paddle::platform::is_cpu_place(*place)) {
6565
cpu_seq_back = seq_back;
6666
} else {
67-
TensorCopy(seq_back, paddle::platform::CPUPlace(), *context, &cpu_seq_back);
67+
TensorCopySync(seq_back, paddle::platform::CPUPlace(), &cpu_seq_back);
6868
cpu_seq_back.set_lod(lod);
6969
}
7070

paddle/fluid/operators/multiplex_op.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ class MultiplexGPUKernel : public framework::OpKernel<T> {
3333
auto cols = ins[0]->numel() / rows;
3434
// copy index to cpu
3535
Tensor index_t_cpu;
36-
TensorCopy(*ids, platform::CPUPlace(), ctx.device_context(), &index_t_cpu);
36+
TensorCopySync(*ids, platform::CPUPlace(), &index_t_cpu);
3737
auto* index = index_t_cpu.data<int32_t>();
3838
auto stream = ctx.cuda_device_context().stream();
3939
platform::CUDAPlace place = boost::get<platform::CUDAPlace>(ctx.GetPlace());
@@ -69,7 +69,7 @@ class MultiplexGradGPUKernel : public framework::OpKernel<T> {
6969
auto cols = ins[0]->numel() / rows;
7070
// copy index to cpu
7171
Tensor index_t_cpu;
72-
TensorCopy(*ids, platform::CPUPlace(), ctx.device_context(), &index_t_cpu);
72+
TensorCopySync(*ids, platform::CPUPlace(), &index_t_cpu);
7373
auto* index = index_t_cpu.data<int32_t>();
7474

7575
auto stream = ctx.cuda_device_context().stream();

paddle/fluid/operators/sequence_slice_op.h

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -66,13 +66,11 @@ class SequenceSliceOpKernel : public framework::OpKernel<T> {
6666

6767
if (platform::is_gpu_place(ctx.GetPlace())) {
6868
offset_cpu.mutable_data<T>(offset->dims(), platform::CPUPlace());
69-
framework::TensorCopy(*offset, platform::CPUPlace(), ctx.device_context(),
70-
&offset_cpu);
69+
framework::TensorCopySync(*offset, platform::CPUPlace(), &offset_cpu);
7170
offset_data = offset_cpu.data<int64_t>();
7271

7372
length_cpu.mutable_data<T>(length->dims(), platform::CPUPlace());
74-
framework::TensorCopy(*length, platform::CPUPlace(), ctx.device_context(),
75-
&length_cpu);
73+
framework::TensorCopySync(*length, platform::CPUPlace(), &length_cpu);
7674
length_data = length_cpu.data<int64_t>();
7775
}
7876

@@ -127,13 +125,11 @@ class SequenceSliceGradOpKernel : public framework::OpKernel<T> {
127125

128126
if (platform::is_gpu_place(ctx.GetPlace())) {
129127
offset_cpu.mutable_data<T>(offset->dims(), platform::CPUPlace());
130-
framework::TensorCopy(*offset, platform::CPUPlace(), ctx.device_context(),
131-
&offset_cpu);
128+
framework::TensorCopySync(*offset, platform::CPUPlace(), &offset_cpu);
132129
offset_data = offset_cpu.data<int64_t>();
133130

134131
length_cpu.mutable_data<T>(length->dims(), platform::CPUPlace());
135-
framework::TensorCopy(*length, platform::CPUPlace(), ctx.device_context(),
136-
&length_cpu);
132+
framework::TensorCopySync(*length, platform::CPUPlace(), &length_cpu);
137133
length_data = length_cpu.data<int64_t>();
138134
}
139135

0 commit comments

Comments
 (0)