Skip to content

Commit 1992f70

Browse files
committed
Merge branch 'develop' into refine_relu_test
2 parents be41c2f + dfdcb7e commit 1992f70

31 files changed

+594
-83
lines changed

CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ message(STATUS "CXX compiler: ${CMAKE_CXX_COMPILER}, version: "
2525
message(STATUS "C compiler: ${CMAKE_C_COMPILER}, version: "
2626
"${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}")
2727

28-
find_package(Sphinx)
2928
if(NOT CMAKE_CROSSCOMPILING)
3029
find_package(CUDA QUIET)
3130
endif(NOT CMAKE_CROSSCOMPILING)
@@ -226,5 +225,7 @@ if(WITH_PYTHON)
226225
endif()
227226

228227
if(WITH_DOC)
228+
find_package(Sphinx REQUIRED)
229+
find_python_module(recommonmark REQUIRED)
229230
add_subdirectory(doc)
230231
endif()

paddle/fluid/framework/operator.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,10 @@ class ExecutionContext {
192192
return op_.Attr<T>(name);
193193
}
194194

195+
bool HasInput(const std::string& name) const { return op_.HasInputs(name); }
196+
197+
bool HasOutput(const std::string& name) const { return op_.HasOutputs(name); }
198+
195199
size_t InputSize(const std::string& name) const {
196200
return op_.Inputs(name).size();
197201
}

paddle/fluid/framework/parallel_executor.cc

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,8 @@ ParallelExecutor::ParallelExecutor(
5858
const std::unordered_set<std::string> &bcast_vars,
5959
const ProgramDesc &main_program, const std::string &loss_var_name,
6060
Scope *scope, const std::vector<Scope *> &local_scopes, bool allow_op_delay,
61-
bool use_default_grad_scale, bool balance_parameter_opt_between_cards)
61+
bool use_default_grad_scale, bool balance_parameter_opt_between_cards,
62+
size_t num_trainers, size_t trainer_id)
6263
: member_(new ParallelExecutorPrivate(places)) {
6364
member_->global_scope_ = scope;
6465

@@ -80,7 +81,13 @@ ParallelExecutor::ParallelExecutor(
8081

8182
// Bcast Parameters to all GPUs
8283
#ifdef PADDLE_WITH_CUDA
83-
member_->nccl_ctxs_.reset(new platform::NCCLContextMap(member_->places_));
84+
auto *nccl_id_var = scope->FindVar(NCCL_ID_VARNAME);
85+
ncclUniqueId *nccl_id = nullptr;
86+
if (nccl_id_var != nullptr) {
87+
nccl_id = nccl_id_var->GetMutable<ncclUniqueId>();
88+
}
89+
member_->nccl_ctxs_.reset(new platform::NCCLContextMap(
90+
member_->places_, nccl_id, num_trainers, trainer_id));
8491
#endif
8592
if (platform::is_gpu_place(places[0]) && member_->local_scopes_.size() != 1 &&
8693
local_scopes.empty()) { // Is CUDA

paddle/fluid/framework/parallel_executor.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ class ParallelExecutor {
4141
const std::string& loss_var_name, Scope* scope,
4242
const std::vector<Scope*>& local_scopes,
4343
bool allow_op_delay, bool use_default_grad_scale,
44-
bool balance_parameter_opt_between_cards);
44+
bool balance_parameter_opt_between_cards,
45+
size_t num_trainers = 1, size_t trainer_id = 0);
4546

4647
~ParallelExecutor();
4748

paddle/fluid/inference/tensorrt/convert/activation_op.cc

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,18 @@ namespace tensorrt {
2121
class ReluOpConverter : public OpConverter {
2222
public:
2323
ReluOpConverter() {}
24-
void operator()(const framework::OpDesc& op) override {
24+
void operator()(const framework::proto::OpDesc& op) override {
25+
// Here the two nullptr looks strange, that's because the
26+
// framework::OpDesc's constructor is strange.
27+
framework::OpDesc op_desc(op, nullptr, nullptr);
2528
LOG(INFO) << "convert a fluid relu op to tensorrt activation layer whose "
2629
"type is Relu";
2730
const nvinfer1::ITensor* input_tensor =
28-
engine_->GetITensor(op.Input("X")[0]);
31+
engine_->GetITensor(op_desc.Input("X")[0]);
2932
nvinfer1::IActivationLayer* layer = TRT_ENGINE_ADD_LAYER(
3033
engine_, Activation, *const_cast<nvinfer1::ITensor*>(input_tensor),
3134
nvinfer1::ActivationType::kRELU);
32-
engine_->SetITensor(op.Output("Out")[0], layer->getOutput(0));
35+
engine_->SetITensor(op_desc.Output("Out")[0], layer->getOutput(0));
3336
}
3437
};
3538

paddle/fluid/inference/tensorrt/convert/conv2d_op.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ namespace tensorrt {
2121
class Conv2dOpConverter : public OpConverter {
2222
public:
2323
Conv2dOpConverter() {}
24-
void operator()(const framework::OpDesc& op) override {
24+
void operator()(const framework::proto::OpDesc& op) override {
2525
LOG(INFO)
2626
<< "convert a fluid conv2d op to tensorrt conv layer without bias";
2727
}

paddle/fluid/inference/tensorrt/convert/io_converter.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ class DefaultIOConverter : public EngineIOConverter {
3939
cudaMemcpyHostToDevice, *stream_));
4040
} else if (is_gpu_place(place)) {
4141
PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out, in.data<float>(), size,
42-
cudaMemcpyHostToHost, *stream_));
42+
cudaMemcpyDeviceToDevice, *stream_));
4343
} else {
4444
PADDLE_THROW("Unknown device for converter");
4545
}

paddle/fluid/inference/tensorrt/convert/mul_op.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ namespace tensorrt {
2121
class MulOpConverter : public OpConverter {
2222
public:
2323
MulOpConverter() {}
24-
void operator()(const framework::OpDesc& op) override {
24+
void operator()(const framework::proto::OpDesc& op) override {
2525
LOG(INFO) << "convert a fluid mul op to tensorrt fc layer without bias";
2626
}
2727
};

paddle/fluid/inference/tensorrt/convert/op_converter.h

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,25 +31,27 @@ namespace tensorrt {
3131
class OpConverter {
3232
public:
3333
OpConverter() {}
34-
virtual void operator()(const framework::OpDesc& op) {}
34+
virtual void operator()(const framework::proto::OpDesc& op) {}
3535

36-
void Run(const framework::OpDesc& op, TensorRTEngine* engine) {
37-
std::string type = op.Type();
36+
void Run(const framework::proto::OpDesc& op, TensorRTEngine* engine) {
37+
std::string type = op.type();
3838
auto* it = Registry<OpConverter>::Lookup(type);
3939
PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]", type);
4040
it->SetEngine(engine);
4141
(*it)(op);
4242
}
4343

4444
// convert fluid op to tensorrt layer
45-
void ConvertOp(const framework::OpDesc& op, TensorRTEngine* engine) {
45+
void ConvertOp(const framework::proto::OpDesc& op, TensorRTEngine* engine) {
4646
OpConverter::Run(op, engine);
4747
}
4848

4949
// convert fluid block to tensorrt network
50-
void ConvertBlock(const framework::BlockDesc& block, TensorRTEngine* engine) {
51-
for (auto op : block.AllOps()) {
52-
OpConverter::Run(*op, engine);
50+
void ConvertBlock(const framework::proto::BlockDesc& block,
51+
TensorRTEngine* engine) {
52+
for (size_t i = 0; i < block.ops_size(); i++) {
53+
const auto& op = block.ops(i);
54+
OpConverter::Run(op, engine);
5355
}
5456
}
5557

paddle/fluid/inference/tensorrt/convert/test_activation_op.cc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ void Compare(const std::string op_type, float input, float expect) {
5151
op_desc.SetInput("X", {"X"});
5252
op_desc.SetOutput("Out", {"Out"});
5353

54-
auto op = framework::OpRegistry::CreateOp(op_desc);
54+
auto op = framework::OpRegistry::CreateOp(*op_desc.Proto());
5555

5656
// run fluid op
5757
op->Run(scope, place);
@@ -68,7 +68,8 @@ void Compare(const std::string op_type, float input, float expect) {
6868
nvinfer1::DimsCHW{1, 1, 1});
6969
// convert op
7070
OpConverter op_converter;
71-
op_converter.ConvertOp(op_desc, engine);
71+
op_converter.ConvertOp(*op_desc.Proto(), engine);
72+
7273
engine->DeclareOutput("Out");
7374
engine->FreezeNetwork();
7475

0 commit comments

Comments
 (0)