Skip to content

Commit 15913d9

Browse files
committed
Merge branch 'develop' of github.com:PaddlePaddle/Paddle into overlap_memcpy_with_dist
2 parents e533a4b + ddd9502 commit 15913d9

40 files changed

+669
-207
lines changed

benchmark/fluid/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,11 @@ Currently supported `--model` argument include:
2929
You can choose to use GPU/CPU training. With GPU training, you can specify
3030
`--gpus <gpu_num>` to run multi GPU training.
3131
* Run distributed training with parameter servers:
32+
* see [run_fluid_benchmark.sh](https://github.com/PaddlePaddle/Paddle/blob/develop/benchmark/fluid/run_fluid_benchmark.sh) as an example.
3233
* start parameter servers:
3334
```bash
3435
PADDLE_TRAINING_ROLE=PSERVER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=1 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist --device GPU --update_method pserver
36+
sleep 15
3537
```
3638
* start trainers:
3739
```bash
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#!/bin/bash
2+
3+
PADDLE_TRAINING_ROLE=PSERVER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=2 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model resnet --device CPU --update_method pserver --iterations=10000 &
4+
5+
sleep 15
6+
7+
CUDA_VISIBLE_DEVICES=0,1 PADDLE_TRAINING_ROLE=TRAINER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=2 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model resnet --device GPU --update_method pserver --iterations=10000 --gpus 2 &
8+
9+
CUDA_VISIBLE_DEVICES=2,3 PADDLE_TRAINING_ROLE=TRAINER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=2 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=1 python fluid_benchmark.py --model resnet --device GPU --update_method pserver --iterations=10000 --gpus 2 &

paddle/fluid/framework/scope.cc

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,7 @@ DEFINE_bool(
3434
namespace paddle {
3535
namespace framework {
3636

37-
Scope::~Scope() {
38-
DropKids();
39-
for (auto& kv : vars_) {
40-
VLOG(3) << "Destroy variable " << kv.first;
41-
delete kv.second;
42-
}
43-
}
37+
Scope::~Scope() { DropKids(); }
4438

4539
Scope& Scope::NewScope() const {
4640
std::unique_lock<std::mutex> lock(mutex_);
@@ -49,10 +43,13 @@ Scope& Scope::NewScope() const {
4943
}
5044

5145
Variable* Scope::Var(const std::string& name) {
46+
// acquire the lock when new var under this scope
47+
std::unique_lock<std::mutex> lock(mutex_);
5248
auto* v = FindVarLocally(name);
5349
if (v != nullptr) return v;
50+
5451
v = new Variable();
55-
vars_[name] = v;
52+
vars_[name].reset(v);
5653
VLOG(3) << "Create variable " << name;
5754
v->name_ = &(vars_.find(name)->first);
5855
return v;
@@ -67,22 +64,29 @@ Variable* Scope::Var(std::string* name) {
6764
}
6865

6966
Variable* Scope::FindVar(const std::string& name) const {
67+
// acquire the lock when find var
68+
std::unique_lock<std::mutex> lock(mutex_);
69+
return FindVarInternal(name);
70+
}
71+
72+
Variable* Scope::FindVarInternal(const std::string& name) const {
7073
auto var = FindVarLocally(name);
7174
if (var != nullptr) {
7275
return var;
7376
}
74-
return (parent_ == nullptr) ? nullptr : parent_->FindVar(name);
77+
return (parent_ == nullptr) ? nullptr : parent_->FindVarInternal(name);
7578
}
7679

7780
const Scope* Scope::FindScope(const Variable* var) const {
7881
for (auto& kv : vars_) {
79-
if (kv.second == var) {
82+
if (kv.second.get() == var) {
8083
return this;
8184
}
8285
}
8386
return (parent_ == nullptr) ? nullptr : parent_->FindScope(var);
8487
}
8588
void Scope::DropKids() {
89+
std::unique_lock<std::mutex> lock(mutex_);
8690
for (Scope* s : kids_) delete s;
8791
kids_.clear();
8892
}
@@ -110,10 +114,10 @@ void Scope::DeleteScope(Scope* scope) const {
110114
}
111115

112116
void Scope::EraseVars(const std::vector<std::string>& var_names) {
117+
std::unique_lock<std::mutex> lock(mutex_);
113118
std::set<std::string> var_set(var_names.begin(), var_names.end());
114119
for (auto it = vars_.begin(); it != vars_.end();) {
115120
if (var_set.find(it->first) != var_set.end()) {
116-
delete it->second;
117121
it = vars_.erase(it);
118122
} else {
119123
++it;
@@ -129,7 +133,7 @@ void Scope::Rename(const std::string& origin_name,
129133
auto new_it = vars_.find(new_name);
130134
PADDLE_ENFORCE(new_it == vars_.end(),
131135
"The variable with name %s is already in the scope", new_name);
132-
vars_[new_name] = origin_it->second;
136+
vars_[new_name].reset(origin_it->second.release());
133137
vars_.erase(origin_it);
134138
}
135139

@@ -141,7 +145,7 @@ std::string Scope::Rename(const std::string& origin_name) const {
141145

142146
Variable* Scope::FindVarLocally(const std::string& name) const {
143147
auto it = vars_.find(name);
144-
if (it != vars_.end()) return it->second;
148+
if (it != vars_.end()) return it->second.get();
145149
return nullptr;
146150
}
147151

paddle/fluid/framework/scope.h

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,15 +47,18 @@ class Scope {
4747
Scope& NewScope() const;
4848

4949
/// Create a variable with given name if it doesn't exist.
50+
/// Caller doesn't own the returned Variable.
5051
Variable* Var(const std::string& name);
5152

5253
/// Create a variable with a scope-unique name.
54+
/// Caller doesn't own the returned Variable.
5355
Variable* Var(std::string* name = nullptr);
5456

5557
void EraseVars(const std::vector<std::string>& var_names);
5658

5759
/// Find a variable in the scope or any of its ancestors. Returns
5860
/// nullptr if cannot find.
61+
/// Caller doesn't own the returned Variable.
5962
Variable* FindVar(const std::string& name) const;
6063

6164
const Scope* parent() const { return parent_; }
@@ -78,13 +81,21 @@ class Scope {
7881
// Rename variable to a new name and return the new name
7982
std::string Rename(const std::string& origin_name) const;
8083

81-
Variable* FindVarLocally(const std::string& name) const;
82-
8384
private:
8485
// Call Scope::NewScope for a sub-scope.
8586
explicit Scope(Scope const* parent) : parent_(parent) {}
8687

87-
mutable std::unordered_map<std::string, Variable*> vars_;
88+
// Called by FindVar recursively.
89+
// Caller doesn't own the returned Variable.
90+
Variable* FindVarInternal(const std::string& name) const;
91+
92+
// Called by FindVarInternal and Var.
93+
// Caller doesn't own the returned Variable.
94+
Variable* FindVarLocally(const std::string& name) const;
95+
96+
mutable std::unordered_map<std::string, std::unique_ptr<Variable>> vars_;
97+
98+
// Scope in `kids_` are owned by this class.
8899
mutable std::list<Scope*> kids_;
89100
Scope const* parent_{nullptr};
90101

paddle/fluid/inference/analysis/helper.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ limitations under the License. */
1818
#include <unordered_map>
1919
#include <vector>
2020

21+
#include "paddle/fluid/framework/scope.h"
22+
#include "paddle/fluid/framework/variable.h"
2123
#include "paddle/fluid/platform/enforce.h"
2224

2325
namespace paddle {
@@ -107,6 +109,13 @@ class OrderedRegistry {
107109
std::vector<std::unique_ptr<T>> data_;
108110
};
109111

112+
template <typename T>
113+
T &GetFromScope(const framework::Scope &scope, const std::string &name) {
114+
framework::Variable *var = scope.FindVar(name);
115+
PADDLE_ENFORCE(var != nullptr);
116+
return *var->GetMutable<T>();
117+
}
118+
110119
} // namespace analysis
111120
} // namespace inference
112121
} // namespace paddle

paddle/fluid/inference/tensorrt/convert/CMakeLists.txt

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,16 @@
11
# Add TRT tests
2-
nv_test(test_op_converter SRCS test_op_converter.cc mul_op.cc conv2d_op.cc DEPS ${FLUID_CORE_MODULES} tensorrt_engine)
32
# This test is not stable
43
# See https://paddleci.ngrok.io/viewLog.html?tab=buildLog&buildTypeId=Paddle_PrCi2&buildId=36834&_focus=8828
54
#nv_test(test_trt_activation_op SRCS test_activation_op.cc activation_op.cc io_converter.cc
65
# DEPS ${FLUID_CORE_MODULES} activation_op tensorrt_engine
76
# SERIAL)
7+
nv_library(tensorrt_converter
8+
SRCS mul_op.cc conv2d_op.cc fc_op.cc
9+
DEPS tensorrt_engine mul_op)
10+
11+
nv_test(test_op_converter SRCS test_op_converter.cc DEPS
12+
${FLUID_CORE_MODULES} tensorrt_engine tensorrt_converter)
13+
814
nv_test(test_io_converter SRCS test_io_converter.cc io_converter.cc DEPS dynload_cuda dynamic_loader lod_tensor)
915
nv_test(test_trt_mul_op SRCS test_mul_op.cc mul_op.cc
1016
DEPS ${FLUID_CORE_MODULES} tensorrt_engine mul_op SERIAL)

paddle/fluid/inference/tensorrt/convert/activation_op.cc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
See the License for the specific language governing permissions and
1313
limitations under the License. */
1414

15+
#include "paddle/fluid/framework/op_registry.h"
1516
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
1617

1718
namespace paddle {
@@ -36,8 +37,8 @@ class ReluOpConverter : public OpConverter {
3637
}
3738
};
3839

39-
REGISTER_TRT_OP_CONVERTER(relu, ReluOpConverter);
40-
4140
} // namespace tensorrt
4241
} // namespace inference
4342
} // namespace paddle
43+
44+
REGISTER_TRT_OP_CONVERTER(relu, ReluOpConverter);

paddle/fluid/inference/tensorrt/convert/conv2d_op.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,14 @@ class Conv2dOpConverter : public OpConverter {
2222
public:
2323
Conv2dOpConverter() {}
2424
void operator()(const framework::proto::OpDesc& op,
25-
const framework::Scope& scope) override {
25+
const framework::Scope& scope, bool test_mode) override {
2626
LOG(INFO)
2727
<< "convert a fluid conv2d op to tensorrt conv layer without bias";
2828
}
2929
};
3030

31-
REGISTER_TRT_OP_CONVERTER(conv2d, Conv2dOpConverter);
32-
3331
} // namespace tensorrt
3432
} // namespace inference
3533
} // namespace paddle
34+
35+
REGISTER_TRT_OP_CONVERTER(conv2d, Conv2dOpConverter);

paddle/fluid/inference/tensorrt/convert/fc_op.cc

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ void ReorderCKtoKC(TensorRTEngine::Weight& iweights,
5656
class FcOpConverter : public OpConverter {
5757
public:
5858
void operator()(const framework::proto::OpDesc& op,
59-
const framework::Scope& scope) override {
59+
const framework::Scope& scope, bool test_mode) override {
6060
VLOG(4) << "convert a fluid fc op to tensorrt fc layer without bias";
6161

6262
framework::OpDesc op_desc(op, nullptr);
@@ -106,14 +106,16 @@ class FcOpConverter : public OpConverter {
106106
n_output, weight.get(), bias.get());
107107

108108
auto output_name = op_desc.Output("Out").front();
109-
engine_->DeclareOutput(layer, 0, output_name);
109+
engine_->SetITensor(output_name, layer->getOutput(0));
110+
if (test_mode) {
111+
engine_->DeclareOutput(output_name);
112+
}
110113
}
111114
};
112115

113-
REGISTER_TRT_OP_CONVERTER(fc, FcOpConverter);
114-
115116
} // namespace tensorrt
116117
} // namespace inference
117118
} // namespace paddle
118119

120+
REGISTER_TRT_OP_CONVERTER(fc, FcOpConverter);
119121
USE_OP(mul);

paddle/fluid/inference/tensorrt/convert/mul_op.cc

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,8 @@ namespace tensorrt {
2323
*/
2424
class MulOpConverter : public OpConverter {
2525
public:
26-
MulOpConverter() {}
2726
void operator()(const framework::proto::OpDesc& op,
28-
const framework::Scope& scope) override {
27+
const framework::Scope& scope, bool test_mode) override {
2928
VLOG(4) << "convert a fluid mul op to tensorrt mul layer without bias";
3029

3130
framework::OpDesc op_desc(op, nullptr);
@@ -37,12 +36,18 @@ class MulOpConverter : public OpConverter {
3736
engine_, MatrixMultiply, *const_cast<nvinfer1::ITensor*>(input1), false,
3837
*const_cast<nvinfer1::ITensor*>(input2), false);
3938

40-
engine_->DeclareOutput(layer, 0, op_desc.Output("Out")[0]);
39+
auto output_name = op_desc.Output("Out")[0];
40+
engine_->SetITensor(output_name, layer->getOutput(0));
41+
if (test_mode) { // the test framework can not determine which is the
42+
// output, so place the declaration inside.
43+
engine_->DeclareOutput(output_name);
44+
}
4145
}
4246
};
4347

44-
REGISTER_TRT_OP_CONVERTER(mul, MulOpConverter);
45-
4648
} // namespace tensorrt
4749
} // namespace inference
4850
} // namespace paddle
51+
52+
USE_OP(mul);
53+
REGISTER_TRT_OP_CONVERTER(mul, MulOpConverter);

0 commit comments

Comments
 (0)