Skip to content

Commit c7153f8

Browse files
authored
Merge pull request #14662 from NHZlX/cherry_pick_sync_pass
cherry-pick from #14649
2 parents 25c2cda + afe829f commit c7153f8

File tree

9 files changed

+128
-16
lines changed

9 files changed

+128
-16
lines changed

paddle/fluid/inference/analysis/passes/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
cc_library(ir_graph_build_pass SRCS ir_graph_build_pass.cc DEPS analysis_pass argument ir_pass_manager)
22
cc_library(ir_analysis_pass SRCS ir_analysis_pass.cc DEPS analysis_pass argument ir_pass_manager)
3-
cc_library(analysis_passes SRCS passes.cc DEPS ir_graph_build_pass ir_analysis_pass)
3+
cc_library(ir_params_sync_among_devices_pass SRCS ir_params_sync_among_devices_pass.cc DEPS analysis_pass argument ir_pass_manager)
4+
cc_library(analysis_passes SRCS passes.cc DEPS ir_graph_build_pass ir_analysis_pass ir_params_sync_among_devices_pass)
45

56
set(analysis_deps ${analysis_deps}
67
ir_graph_build_pass

paddle/fluid/inference/analysis/passes/ir_analysis_compose_pass.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ void IrAnalysisComposePass::InitTensorRTAttrs(Argument *argument) {
6161
void IrAnalysisComposePass::ApplyIrPasses(Argument *argument) {
6262
std::vector<std::string> passes({
6363
"ir_graph_build_pass", "ir_analysis_pass",
64+
"ir_params_sync_among_devices_pass",
6465
});
6566
for (const auto &pass : passes) {
6667
VLOG(2) << "Run pass " << pass;

paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,7 @@ void IrGraphBuildPass::RunImpl(Argument *argument) {
3636
// so that the parameters will on the same device, or they will keep copying
3737
// between difference devices.
3838
platform::Place place;
39-
if (argument->use_gpu()) {
40-
PADDLE_ENFORCE(argument->gpu_device_id_valid());
41-
place = platform::CUDAPlace(argument->gpu_device_id());
42-
} else {
43-
place = platform::CPUPlace();
44-
}
39+
place = platform::CPUPlace();
4540

4641
if (argument->model_dir_valid()) {
4742
auto program =
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h"
16+
#include "paddle/fluid/framework/data_layout.h"
17+
#include "paddle/fluid/framework/lod_tensor.h"
18+
#include "paddle/fluid/framework/tensor_util.h"
19+
#include "paddle/fluid/platform/enforce.h"
20+
21+
namespace paddle {
22+
namespace inference {
23+
namespace analysis {
24+
25+
void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) {
26+
PADDLE_ENFORCE(argument->scope_valid());
27+
PADDLE_ENFORCE(argument->use_gpu_valid());
28+
29+
platform::Place place;
30+
31+
// The parameters are on the cpu, therefore, synchronization is not necessary.
32+
if (!argument->use_gpu()) return;
33+
34+
LOG(INFO) << "Sync params from CPU to GPU";
35+
36+
PADDLE_ENFORCE(argument->gpu_device_id_valid());
37+
place = platform::CUDAPlace(argument->gpu_device_id());
38+
39+
auto *scope = argument->scope_ptr();
40+
std::vector<std::string> all_vars = scope->LocalVarNames();
41+
42+
// We get all the vars from local_scope instead of the ProgramDesc.
43+
// Because there exists the case that new parameter variables are not added to
44+
// the program in the analysis pass.
45+
for (auto &var_name : all_vars) {
46+
auto *var = scope->FindLocalVar(var_name);
47+
PADDLE_ENFORCE(var != nullptr);
48+
if (var->IsType<framework::LoDTensor>() ||
49+
var->IsType<framework::Tensor>()) {
50+
auto *t = var->GetMutable<framework::LoDTensor>();
51+
52+
platform::CPUPlace cpu_place;
53+
framework::LoDTensor temp_tensor;
54+
temp_tensor.Resize(t->dims());
55+
temp_tensor.mutable_data<float>(cpu_place);
56+
57+
// Copy the parameter data to a tmp tensor.
58+
TensorCopySync(*t, cpu_place, &temp_tensor);
59+
// Reallocation the space on GPU
60+
t->mutable_data<float>(place);
61+
62+
// Copy parameter data to newly allocated GPU space.
63+
TensorCopySync(temp_tensor, place, t);
64+
}
65+
}
66+
}
67+
68+
std::string IrParamsSyncAmongDevicesPass::repr() const {
69+
return "ir-params-sync-among-devices-pass";
70+
}
71+
72+
} // namespace analysis
73+
} // namespace inference
74+
} // namespace paddle
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#pragma once
16+
17+
#include <string>
18+
#include <vector>
19+
20+
#include "paddle/fluid/framework/scope.h"
21+
#include "paddle/fluid/inference/analysis/analysis_pass.h"
22+
#include "paddle/fluid/platform/place.h"
23+
24+
namespace paddle {
25+
namespace inference {
26+
namespace analysis {
27+
28+
/*
29+
* Sync parameter from CPU to GPU.
30+
*/
31+
class IrParamsSyncAmongDevicesPass : public AnalysisPass {
32+
public:
33+
void RunImpl(Argument *argument) override;
34+
std::string repr() const override;
35+
};
36+
37+
} // namespace analysis
38+
} // namespace inference
39+
} // namespace paddle

paddle/fluid/inference/analysis/passes/passes.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "paddle/fluid/inference/analysis/passes/ir_analysis_compose_pass.cc"
1717
#include "paddle/fluid/inference/analysis/passes/ir_analysis_pass.h"
1818
#include "paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h"
19+
#include "paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h"
1920

2021
namespace paddle {
2122
namespace inference {
@@ -27,6 +28,9 @@ PassRegistry::PassRegistry() {
2728
std::unique_ptr<AnalysisPass>(new IrGraphBuildPass));
2829
passes_.emplace("ir_analysis_compose_pass",
2930
std::unique_ptr<AnalysisPass>(new IrAnalysisComposePass));
31+
passes_.emplace(
32+
"ir_params_sync_among_devices_pass",
33+
std::unique_ptr<AnalysisPass>(new IrParamsSyncAmongDevicesPass));
3034
}
3135

3236
} // namespace analysis

paddle/fluid/inference/api/paddle_pass_builder.h

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -116,12 +116,8 @@ class CpuPassStrategy : public PassStrategy {
116116
class GpuPassStrategy : public PassStrategy {
117117
public:
118118
GpuPassStrategy() : PassStrategy({}) {
119-
// TODO(NHZlX) Problem with Data synchronization between GPU and CPU
120-
// When running in GPU mode, the parameters are all on GPU. But the
121-
// opearations of "conv_bn_fuse_pass" are on CPU.
122119
passes_.assign({
123-
"infer_clean_graph_pass",
124-
// "infer_clean_graph_pass", "conv_bn_fuse_pass",
120+
"infer_clean_graph_pass", "conv_bn_fuse_pass",
125121
});
126122
}
127123

paddle/fluid/inference/utils/benchmark.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ std::string Benchmark::SerializeToString() const {
3333
ss << batch_size_ << "\t";
3434
ss << num_threads_ << "\t";
3535
ss << latency_ << "\t";
36-
ss << 1000 / latency_;
36+
ss << 1000.0 / latency_;
3737
ss << '\n';
3838
return ss.str();
3939
}

paddle/fluid/inference/utils/benchmark.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,11 @@
1111
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
14+
#pragma once
1415

1516
#include <fstream>
1617
#include <iostream>
18+
#include <string>
1719

1820
namespace paddle {
1921
namespace inference {
@@ -31,8 +33,8 @@ struct Benchmark {
3133
bool use_gpu() const { return use_gpu_; }
3234
void SetUseGpu() { use_gpu_ = true; }
3335

34-
int latency() const { return latency_; }
35-
void SetLatency(int x) { latency_ = x; }
36+
float latency() const { return latency_; }
37+
void SetLatency(float x) { latency_ = x; }
3638

3739
const std::string& name() const { return name_; }
3840
void SetName(const std::string& name) { name_ = name; }
@@ -43,7 +45,7 @@ struct Benchmark {
4345
private:
4446
bool use_gpu_{false};
4547
int batch_size_{0};
46-
int latency_;
48+
float latency_;
4749
int num_threads_{1};
4850
std::string name_;
4951
};

0 commit comments

Comments
 (0)