Skip to content

Commit 41eeb77

Browse files
committed
Merge branch 'develop' into clean_inference_lib
2 parents a35e7f4 + 6447155 commit 41eeb77

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+3015
-447
lines changed

paddle/fluid/API.spec

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,8 @@ paddle.fluid.layers.conv2d_transpose ArgSpec(args=['input', 'num_filters', 'outp
7575
paddle.fluid.layers.conv3d_transpose ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None))
7676
paddle.fluid.layers.sequence_expand ArgSpec(args=['x', 'y', 'ref_level', 'name'], varargs=None, keywords=None, defaults=(-1, None))
7777
paddle.fluid.layers.sequence_expand_as ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,))
78-
paddle.fluid.layers.sequence_pad ArgSpec(args=['x', 'pad_value', 'maxlen'], varargs=None, keywords=None, defaults=(None,))
78+
paddle.fluid.layers.sequence_pad ArgSpec(args=['x', 'pad_value', 'maxlen', 'name'], varargs=None, keywords=None, defaults=(None, None))
79+
paddle.fluid.layers.sequence_unpad ArgSpec(args=['x', 'length', 'name'], varargs=None, keywords=None, defaults=(None,))
7980
paddle.fluid.layers.lstm_unit ArgSpec(args=['x_t', 'hidden_t_prev', 'cell_t_prev', 'forget_bias', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(0.0, None, None, None))
8081
paddle.fluid.layers.reduce_sum ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None))
8182
paddle.fluid.layers.reduce_mean ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None))
@@ -84,6 +85,7 @@ paddle.fluid.layers.reduce_min ArgSpec(args=['input', 'dim', 'keep_dim', 'name']
8485
paddle.fluid.layers.reduce_prod ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None))
8586
paddle.fluid.layers.sequence_first_step ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None)
8687
paddle.fluid.layers.sequence_last_step ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None)
88+
paddle.fluid.layers.sequence_slice ArgSpec(args=['input', 'offset', 'length', 'name'], varargs=None, keywords=None, defaults=(None,))
8789
paddle.fluid.layers.dropout ArgSpec(args=['x', 'dropout_prob', 'is_test', 'seed', 'name'], varargs=None, keywords=None, defaults=(False, None, None))
8890
paddle.fluid.layers.split ArgSpec(args=['input', 'num_or_sections', 'dim', 'name'], varargs=None, keywords=None, defaults=(-1, None))
8991
paddle.fluid.layers.ctc_greedy_decoder ArgSpec(args=['input', 'blank', 'name'], varargs=None, keywords=None, defaults=(None,))

paddle/fluid/framework/executor.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ void InitializeVariable(Variable* var, proto::VarType::Type var_type) {
101101
} else if (var_type == proto::VarType::FETCH_LIST) {
102102
var->GetMutable<FeedFetchList>();
103103
} else if (var_type == proto::VarType::STEP_SCOPES) {
104-
var->GetMutable<std::vector<framework::Scope>>();
104+
var->GetMutable<std::vector<framework::Scope*>>();
105105
} else if (var_type == proto::VarType::LOD_RANK_TABLE) {
106106
var->GetMutable<LoDRankTable>();
107107
} else if (var_type == proto::VarType::LOD_TENSOR_ARRAY) {

paddle/fluid/framework/feed_fetch_method.cc

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,7 @@ void SetFeedVariable(Scope* scope, const LoDTensor& input,
2727
// be created.
2828
VLOG(3) << "SetFeedVariable name=" << var_name << " index=" << index;
2929
Variable* g_feed_value = scope->Var(var_name);
30-
auto& feed_inputs =
31-
*(g_feed_value->GetMutable<std::vector<paddle::framework::LoDTensor>>());
30+
auto& feed_inputs = *(g_feed_value->GetMutable<FeedFetchList>());
3231
if (index >= feed_inputs.size()) {
3332
feed_inputs.resize(index + 1);
3433
}

paddle/fluid/framework/naive_executor.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ static void InitializeVariable(Variable *var, proto::VarType::Type var_type) {
3737
} else if (var_type == proto::VarType::FETCH_LIST) {
3838
var->GetMutable<FeedFetchList>();
3939
} else if (var_type == proto::VarType::STEP_SCOPES) {
40-
var->GetMutable<std::vector<framework::Scope>>();
40+
var->GetMutable<std::vector<framework::Scope *>>();
4141
} else if (var_type == proto::VarType::LOD_RANK_TABLE) {
4242
var->GetMutable<LoDRankTable>();
4343
} else if (var_type == proto::VarType::LOD_TENSOR_ARRAY) {

paddle/fluid/framework/operator.cc

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -149,9 +149,17 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
149149
platform::SetDeviceId(dev_id);
150150
#endif
151151
}
152-
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
153-
platform::RecordEvent record_event(Type(), pool.Get(place));
154-
RunImpl(scope, place);
152+
153+
// The profile has a process-wide mutex, results in serious performance issue
154+
// in concurrency scenerio. Here use an `if` to fix this issue.
155+
// Please not remove the `if`, ask @Superjomn if there are any concern.
156+
if (platform::IsProfileEnabled()) {
157+
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
158+
platform::RecordEvent record_event(Type(), pool.Get(place));
159+
RunImpl(scope, place);
160+
} else {
161+
RunImpl(scope, place);
162+
}
155163
VLOG(3) << place << " " << DebugStringEx(&scope);
156164
}
157165

paddle/fluid/framework/tensor_util.cc

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,11 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
3636
auto size = src.numel() * SizeOfType(src.type());
3737

3838
if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
39+
if (src_ptr == dst_ptr) {
40+
VLOG(3) << "Skip copy the same data async from " << src_place << " to "
41+
<< dst_place;
42+
return;
43+
}
3944
memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr,
4045
boost::get<platform::CPUPlace>(src_place), src_ptr, size);
4146
}
@@ -71,6 +76,11 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
7176
auto stream =
7277
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
7378
if (platform::is_same_place(src_place, dst_place)) {
79+
if (src_ptr == dst_ptr) {
80+
VLOG(3) << "Skip copy the same data async from " << src_place << " to "
81+
<< dst_place;
82+
return;
83+
}
7484
memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size,
7585
stream);
7686
} else {
@@ -114,6 +124,11 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
114124
auto dst_ptr = dst->mutable_data(dst_place, src.type());
115125
auto size = src.numel() * SizeOfType(src.type());
116126
if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
127+
if (src_ptr == dst_ptr) {
128+
VLOG(3) << "Skip copy the same data from " << src_place << " to "
129+
<< dst_place;
130+
return;
131+
}
117132
memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr,
118133
boost::get<platform::CPUPlace>(src_place), src_ptr, size);
119134
}
@@ -130,6 +145,11 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
130145
memory::Copy(dst_gpu_place, dst_ptr, src_cpu_place, src_ptr, size, nullptr);
131146
} else if (platform::is_gpu_place(src_place) &&
132147
platform::is_gpu_place(dst_place)) {
148+
if (src_ptr == dst_ptr && platform::is_same_place(src_place, dst_place)) {
149+
VLOG(3) << "Skip copy the same data from " << src_place << " to "
150+
<< dst_place;
151+
return;
152+
}
133153
auto src_gpu_place = boost::get<platform::CUDAPlace>(src_place);
134154
auto dst_gpu_place = boost::get<platform::CUDAPlace>(dst_place);
135155
memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, nullptr);

paddle/fluid/framework/tensor_util_test.cc

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,11 @@ TEST(TensorCopy, Tensor) {
4141
EXPECT_EQ(src_ptr[i], dst_ptr[i]);
4242
}
4343

44+
TensorCopy(dst_tensor, *cpu_place, &dst_tensor);
45+
for (size_t i = 0; i < 9; ++i) {
46+
EXPECT_EQ(src_ptr[i], dst_ptr[i]);
47+
}
48+
4449
EXPECT_TRUE(dst_tensor.layout() == src_tensor.layout());
4550

4651
Tensor slice_tensor = src_tensor.Slice(1, 2);
@@ -82,6 +87,15 @@ TEST(TensorCopy, Tensor) {
8287
EXPECT_EQ(src_ptr[i], dst_ptr[i]);
8388
}
8489

90+
// Copy the same tensor
91+
TensorCopy(gpu_tensor, *gpu_place, gpu_ctx, &gpu_tensor);
92+
gpu_ctx.Wait();
93+
const int* dst_ptr_tmp = dst_tensor.data<int>();
94+
EXPECT_NE(src_ptr, dst_ptr_tmp);
95+
for (size_t i = 0; i < 9; ++i) {
96+
EXPECT_EQ(src_ptr[i], dst_ptr_tmp[i]);
97+
}
98+
8599
Tensor slice_tensor = src_tensor.Slice(1, 2);
86100

87101
// CPU Slice Tensor to GPU Tensor

paddle/fluid/framework/var_desc.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ class VarDesc {
5959
public:
6060
explicit VarDesc(const std::string &name) {
6161
desc_.set_name(name);
62+
// TODO(paddle-dev): Why default to lodtensor.
6263
desc_.mutable_type()->set_type(proto::VarType::LOD_TENSOR);
6364
}
6465

paddle/fluid/framework/variable.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,12 @@ class Variable {
3838

3939
template <typename T>
4040
T* GetMutable() {
41-
if (!IsType<T>()) {
41+
if (!holder_) {
4242
holder_.reset(new PlaceholderImpl<T>(new T()));
43+
} else {
44+
PADDLE_ENFORCE(IsType<T>(),
45+
"Variable must be type %s, the holding type is %s",
46+
typeid(T).name(), holder_->Type().name());
4347
}
4448
return static_cast<T*>(holder_->Ptr());
4549
}

paddle/fluid/framework/variable_test.cc

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,10 @@ TEST(Variable, GetMutable) {
3333
const Tensor& tt = v->Get<Tensor>();
3434
EXPECT_EQ(1234, tt.content_);
3535

36-
std::string* s = v->GetMutable<std::string>();
37-
*s = "hello";
38-
39-
const std::string& ss = v->Get<std::string>();
40-
EXPECT_EQ("hello", ss);
36+
try {
37+
v->GetMutable<std::string>();
38+
} catch (std::exception& e) {
39+
return;
40+
}
41+
EXPECT_TRUE(false);
4142
}

0 commit comments

Comments
 (0)