Skip to content

Commit 8edf60c

Browse files
author
Yibing Liu
committed
Merge branch 'develop' of upstream into fix_seq_pad
2 parents ce773ed + 437debf commit 8edf60c

File tree

16 files changed

+269
-91
lines changed

16 files changed

+269
-91
lines changed

cmake/tensorrt.cmake

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@ find_library(TENSORRT_LIBRARY NAMES libnvinfer.so libnvinfer.a
1616
DOC "Path to TensorRT library.")
1717

1818
if(TENSORRT_INCLUDE_DIR AND TENSORRT_LIBRARY)
19+
if(WITH_DSO)
1920
set(TENSORRT_FOUND ON)
21+
endif(WITH DSO)
2022
else()
2123
set(TENSORRT_FOUND OFF)
2224
endif()

paddle/fluid/framework/ir/graph_pattern_detector.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -429,7 +429,7 @@ struct LSTM : public PatternBase {
429429

430430
struct GRU : public PatternBase {
431431
GRU(PDPattern* pattern, const std::string& name_scope)
432-
: PatternBase(pattern, name_scope, "lstm") {}
432+
: PatternBase(pattern, name_scope, "gru") {}
433433

434434
PDNode* operator()(PDNode* x);
435435

paddle/fluid/inference/api/api.cc

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
99
See the License for the specific language governing permissions and
1010
limitations under the License. */
1111

12-
#include <glog/logging.h>
1312
#include "paddle/fluid/inference/api/paddle_inference_api.h"
13+
#include "paddle/fluid/platform/enforce.h"
1414

1515
namespace paddle {
1616

@@ -64,13 +64,15 @@ PaddleBuf& PaddleBuf::operator=(PaddleBuf&& other) {
6464

6565
void PaddleBuf::Resize(size_t length) {
6666
// Only the owned memory can be reset, the external memory can't be changed.
67-
if (length_ == length) return;
67+
if (length_ >= length) return;
6868
if (memory_owned_) {
6969
Free();
70+
data_ = malloc(length);
71+
length_ = length;
72+
memory_owned_ = true;
73+
} else {
74+
PADDLE_THROW("The memory is allocated externally, can not Resized");
7075
}
71-
data_ = new char[length];
72-
length_ = length;
73-
memory_owned_ = true;
7476
}
7577

7678
void PaddleBuf::Reset(void* data, size_t length) {
@@ -82,8 +84,8 @@ void PaddleBuf::Reset(void* data, size_t length) {
8284

8385
void PaddleBuf::Free() {
8486
if (memory_owned_ && data_) {
85-
assert(length_ > 0);
86-
delete[] static_cast<char*>(data_);
87+
PADDLE_ENFORCE_GT(length_, 0);
88+
free(static_cast<char*>(data_));
8789
data_ = nullptr;
8890
length_ = 0;
8991
}

paddle/fluid/inference/tests/api/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ set(TEXT_CLASSIFICATION_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/text_classifi
5353
download_model_and_data(${TEXT_CLASSIFICATION_INSTALL_DIR} "text-classification-Senta.tar.gz" "text_classification_data.txt.tar.gz")
5454
inference_analysis_test(test_analyzer_text_classification SRCS analyzer_text_classification_tester.cc
5555
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS}
56-
ARGS --infer_model=${TEXT_CLASSIFICATION_INSTALL_DIR}/text-classification-Senta
56+
ARGS --infer_model=${TEXT_CLASSIFICATION_INSTALL_DIR}/model
5757
--infer_data=${TEXT_CLASSIFICATION_INSTALL_DIR}/data.txt)
5858

5959
# ocr

paddle/fluid/operators/conv_mkldnn_op.cc

Lines changed: 31 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
300300
std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings");
301301
std::vector<int> dilations = ctx.Attr<std::vector<int>>("dilations");
302302
bool fuse_relu = ctx.Attr<bool>("fuse_relu");
303+
bool fuse_eltwise = ctx.Attr<bool>("fuse_eltwise");
303304
int groups = ctx.Attr<int>("groups");
304305

305306
// TODO: add support for dilation
@@ -366,12 +367,13 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
366367
bias_tz = paddle::framework::vectorize2int(bias->dims());
367368
auto bias_md = platform::MKLDNNMemDesc(
368369
bias_tz, platform::MKLDNNGetDataType<T>(), memory::format::x);
369-
conv_pd =
370-
ConvFwdPrimitiveDesc(src_md, weights_md, bias_md, dst_md, strides,
371-
paddings, mkldnn_engine, fuse_relu);
370+
conv_pd = ConvFwdPrimitiveDesc(src_md, weights_md, bias_md, dst_md,
371+
strides, paddings, mkldnn_engine,
372+
fuse_relu, fuse_eltwise);
372373
} else {
373-
conv_pd = ConvFwdPrimitiveDesc(src_md, weights_md, dst_md, strides,
374-
paddings, mkldnn_engine, fuse_relu);
374+
conv_pd =
375+
ConvFwdPrimitiveDesc(src_md, weights_md, dst_md, strides, paddings,
376+
mkldnn_engine, fuse_relu, fuse_eltwise);
375377
}
376378
// Save conv_pd/src_memory/weights_memory for backward pass
377379
dev_ctx.SetBlob(key_conv_pd, conv_pd);
@@ -421,16 +423,26 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
421423
}
422424

423425
private:
424-
mkldnn::primitive_attr AddRelu() const {
425-
// Fusion with ReLU layer is executed through the PostOps feature. Create a
426-
// PostOps object and configure it to execute an eltwise relu operation.
426+
mkldnn::primitive_attr CreatePostOps(bool fuse_relu,
427+
bool fuse_eltwise) const {
427428
mkldnn::primitive_attr conv_attr;
428-
constexpr float scale = 1.0f;
429-
constexpr float negative_slope = 0.0f;
430-
constexpr float placeholder = 0.0f;
431429
mkldnn::post_ops post_operations;
432-
post_operations.append_eltwise(scale, mkldnn::algorithm::eltwise_relu,
433-
negative_slope, placeholder);
430+
// Fusion with Elementwise layer relies on adding a sum post-operation with
431+
// the scale parameter. It is assumed that when fuse_eltwise is true, the
432+
// Output tensor contains the data coming from residual connection. The
433+
// result of this post_op is: Output = scale * Output + Conv_Out.
434+
if (fuse_eltwise) {
435+
post_operations.append_sum(1.0f);
436+
}
437+
// Fusion with ReLU layer is executed through the PostOps feature. Create a
438+
// PostOps object and configure it to execute an eltwise relu operation.
439+
if (fuse_relu) {
440+
constexpr float scale = 1.0f;
441+
constexpr float negative_slope = 0.0f;
442+
constexpr float placeholder = 0.0f;
443+
post_operations.append_eltwise(scale, mkldnn::algorithm::eltwise_relu,
444+
negative_slope, placeholder);
445+
}
434446
conv_attr.set_post_ops(post_operations);
435447
return conv_attr;
436448
}
@@ -439,8 +451,8 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
439451
ConvFwdPrimitiveDesc(const memory::desc& src, const memory::desc& weights,
440452
const memory::desc& dst, const std::vector<int>& strides,
441453
const std::vector<int>& paddings,
442-
const mkldnn::engine& engine,
443-
const bool fuse_relu) const {
454+
const mkldnn::engine& engine, const bool fuse_relu,
455+
const bool fuse_eltwise) const {
444456
memory::dims stride_dims = {strides[0], strides[1]};
445457
memory::dims padding_dims = {paddings[0], paddings[1]};
446458

@@ -449,10 +461,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
449461
dst, stride_dims, padding_dims, padding_dims,
450462
mkldnn::padding_kind::zero);
451463

452-
mkldnn::primitive_attr conv_attr;
453-
if (fuse_relu) {
454-
conv_attr = AddRelu();
455-
}
464+
mkldnn::primitive_attr conv_attr = CreatePostOps(fuse_relu, fuse_eltwise);
456465

457466
auto p_conv_pd = new mkldnn::convolution_forward::primitive_desc(
458467
conv_desc, conv_attr, engine);
@@ -466,8 +475,8 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
466475
const memory::desc& bias, const memory::desc& dst,
467476
const std::vector<int>& strides,
468477
const std::vector<int>& paddings,
469-
const mkldnn::engine& engine,
470-
const bool fuse_relu) const {
478+
const mkldnn::engine& engine, const bool fuse_relu,
479+
const bool fuse_eltwise) const {
471480
memory::dims stride_dims = {strides[0], strides[1]};
472481
memory::dims padding_dims = {paddings[0], paddings[1]};
473482

@@ -476,10 +485,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
476485
bias, dst, stride_dims, padding_dims, padding_dims,
477486
mkldnn::padding_kind::zero);
478487

479-
mkldnn::primitive_attr conv_attr;
480-
if (fuse_relu) {
481-
conv_attr = AddRelu();
482-
}
488+
mkldnn::primitive_attr conv_attr = CreatePostOps(fuse_relu, fuse_eltwise);
483489

484490
auto p_conv_pd = new mkldnn::convolution_forward::primitive_desc(
485491
conv_desc, conv_attr, engine);

paddle/fluid/operators/conv_op.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,11 @@ void Conv2DOpMaker::Make() {
164164
.SetDefault(false);
165165
AddAttr<bool>("fuse_relu", "(bool, default false) Only used in mkldnn kernel")
166166
.SetDefault(false);
167+
AddAttr<bool>("fuse_eltwise",
168+
"(bool, default false) Only used in mkldnn kernel. Used "
169+
"whenever convolution output is connected via skip connection "
170+
"to a previous layer.")
171+
.SetDefault(false);
167172
AddAttr<std::string>(
168173
"data_format",
169174
"(string, default NCHW) Only used in "

paddle/fluid/operators/distributed/grpc_client.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ VarHandlePtr GRPCClient::AsyncGetVar(const std::string& ep,
125125
VarHandlePtr h(new VarHandle(ep, "Get", var_name_val, p_ctx, p_scope));
126126
s->Prepare(h, time_out);
127127

128-
framework::AsyncIO([var_name_val, p_scope, p_ctx, s, this] {
128+
framework::AsyncIO([var_name_val, s, this] {
129129
// prepare input
130130
sendrecv::VariableMessage req;
131131
req.set_varname(var_name_val);
@@ -166,7 +166,7 @@ VarHandlePtr GRPCClient::AsyncPrefetchVar(const std::string& ep,
166166
s->Prepare(h, time_out);
167167

168168
framework::AsyncIO([in_var_name_val, out_var_name_val, ep_val, p_scope, p_ctx,
169-
time_out, s, this] {
169+
s, this] {
170170
auto* var = p_scope->FindVar(in_var_name_val);
171171

172172
::grpc::ByteBuffer req;

paddle/fluid/operators/distributed/proto_encoder_helper.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,10 @@ class ProtoEncodeHelper {
8282
: base_(buf), p_(buf), limit_(base_ + max_size) {}
8383

8484
~ProtoEncodeHelper() {
85+
#define REPLACE_ENFORCE_GLOG 1
8586
// Make sure callers didn't do operations that went over max_size promised
86-
PADDLE_ENFORCE_LE(p_, limit_);
87+
paddle::platform::throw_on_error(p_ <= limit_);
88+
#undef REPLACE_ENFORCE_GLOG
8789
}
8890

8991
const char* data() const { return base_; }

paddle/fluid/operators/listen_and_serv_op.cc

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -59,17 +59,16 @@ static void ParallelExecuteBlocks(
5959
framework::ProgramDesc *program, framework::Scope *scope) {
6060
std::vector<std::future<void>> fs;
6161
for (size_t idx : parallel_blkids) {
62-
fs.push_back(
63-
framework::Async([&executor, &prepared, &program, &scope, idx]() {
64-
int run_block = idx; // thread local
65-
try {
66-
VLOG(3) << "running server block: " << run_block
67-
<< "pointer: " << prepared[run_block].get();
68-
executor->RunPreparedContext(prepared[run_block].get(), scope);
69-
} catch (const std::exception &e) {
70-
LOG(ERROR) << "run sub program error " << e.what();
71-
}
72-
}));
62+
fs.push_back(framework::Async([&executor, &prepared, &scope, idx]() {
63+
int run_block = idx; // thread local
64+
try {
65+
VLOG(3) << "running server block: " << run_block
66+
<< "pointer: " << prepared[run_block].get();
67+
executor->RunPreparedContext(prepared[run_block].get(), scope);
68+
} catch (const std::exception &e) {
69+
LOG(ERROR) << "run sub program error " << e.what();
70+
}
71+
}));
7372
}
7473
for (size_t i = 0; i < fs.size(); ++i) fs[i].wait();
7574
}

paddle/fluid/operators/math/sequence_pooling.cc

Lines changed: 62 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,58 @@ class MaxSeqPoolGradFunctor {
103103
}
104104
};
105105

106+
template <typename T>
107+
class LastSeqPoolFunctor {
108+
public:
109+
void operator()(const platform::CPUDeviceContext& context,
110+
const framework::LoDTensor& input,
111+
framework::Tensor* output) {
112+
// Create pointers to input and output data
113+
auto* in_data = input.data<T>();
114+
auto* out_data = output->data<T>();
115+
116+
// Calculate the size of each item in sequence
117+
int64_t item_size = input.numel() / input.dims()[0];
118+
auto lod = input.lod()[0];
119+
int seq_num = static_cast<int>(lod.size()) - 1;
120+
for (int i = 0; i < seq_num; ++i) {
121+
// Calculate the length of each sequence
122+
int64_t seq_len = static_cast<int64_t>(lod[i + 1] - lod[i]);
123+
// Point to the begin of next sequence
124+
in_data += seq_len * item_size;
125+
// Copy the last item of sequence to output
126+
std::memcpy(out_data, (in_data - item_size), item_size * sizeof(T));
127+
out_data += item_size;
128+
}
129+
}
130+
};
131+
132+
template <typename T>
133+
class FirstSeqPoolFunctor {
134+
public:
135+
void operator()(const platform::CPUDeviceContext& context,
136+
const framework::LoDTensor& input,
137+
framework::Tensor* output) {
138+
// Create pointers to input and output data
139+
auto* in_data = input.data<T>();
140+
auto* out_data = output->data<T>();
141+
142+
// Calculate the size of each item in sequence
143+
int64_t item_size = input.numel() / input.dims()[0];
144+
auto lod = input.lod()[0];
145+
int seq_num = static_cast<int>(lod.size()) - 1;
146+
for (int i = 0; i < seq_num; ++i) {
147+
// Calculate the length of each sequence
148+
int64_t seq_len = static_cast<int64_t>(lod[i + 1] - lod[i]);
149+
// Copy the first item of sequence to output
150+
std::memcpy(out_data, in_data, item_size * sizeof(T));
151+
// Point to the next sequence
152+
in_data += seq_len * item_size;
153+
out_data += item_size;
154+
}
155+
}
156+
};
157+
106158
template <typename T>
107159
class SequencePoolFunctor<platform::CPUDeviceContext, T> {
108160
public:
@@ -116,6 +168,16 @@ class SequencePoolFunctor<platform::CPUDeviceContext, T> {
116168
max_pool(context, input, output, index);
117169
return;
118170
}
171+
if (pooltype == "LAST") {
172+
math::LastSeqPoolFunctor<T> last_pool;
173+
last_pool(context, input, output);
174+
return;
175+
}
176+
if (pooltype == "FIRST") {
177+
math::FirstSeqPoolFunctor<T> first_pool;
178+
first_pool(context, input, output);
179+
return;
180+
}
119181
auto lod = input.lod()[0];
120182
auto& place = *context.eigen_device();
121183
for (int i = 0; i < static_cast<int>(lod.size()) - 1; ++i) {
@@ -133,10 +195,6 @@ class SequencePoolFunctor<platform::CPUDeviceContext, T> {
133195
} else if (pooltype == "SQRT") {
134196
out_e.device(place) = in_e.sum(Eigen::array<int, 1>({{0}})) /
135197
std::sqrt(static_cast<T>(h));
136-
} else if (pooltype == "LAST") {
137-
out_e.device(place) = in_e.chip(h - 1, 0);
138-
} else if (pooltype == "FIRST") {
139-
out_e.device(place) = in_e.chip(0, 0);
140198
} else {
141199
PADDLE_THROW("unsupported pooling pooltype");
142200
}

0 commit comments

Comments
 (0)