Skip to content

Commit 0b29078

Browse files
authored
Merge branch 'develop' into grid_sampler
2 parents e99da0b + 0c319e0 commit 0b29078

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+2048
-576
lines changed

CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@ option(WITH_DISTRIBUTE "Compile with distributed support" OFF)
6262
option(USE_EIGEN_FOR_BLAS "Use matrix multiplication in Eigen" OFF)
6363
option(EIGEN_USE_THREADS "Compile with multi-threaded Eigen" OFF)
6464
option(WITH_ARM_FP16 "Use half precision support on armv8.2-a cpu" OFF)
65-
option(WITH_FAST_BUNDLE_TEST "Bundle tests that can be run in a single process together to reduce launch overhead" OFF)
6665
option(WITH_CONTRIB "Compile the third-party contributation" OFF)
6766
option(REPLACE_ENFORCE_GLOG "Replace PADDLE_ENFORCE with glog/CHECK for better debug." OFF)
6867
option(WITH_ANAKIN "Compile with Anakin library" OFF)

README.md

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22

33

44
[![Build Status](https://travis-ci.org/PaddlePaddle/Paddle.svg?branch=develop)](https://travis-ci.org/PaddlePaddle/Paddle)
5-
[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://paddlepaddle.org/documentation/docs/en/1.0/getstarted/index_en.html)
6-
[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://paddlepaddle.org/documentation/docs/zh/1.0/beginners_guide/index.html)
5+
[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://paddlepaddle.org/documentation/docs/en/1.1/getstarted/index_en.html)
6+
[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://paddlepaddle.org/documentation/docs/zh/1.1/beginners_guide/index.html)
77
[![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases)
88
[![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)
99

@@ -19,17 +19,17 @@ Our vision is to enable deep learning for everyone via PaddlePaddle.
1919
Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddle/releases) to track the latest feature of PaddlePaddle.
2020

2121

22-
### Latest PaddlePaddle Release: [Fluid 1.0.1](https://github.com/PaddlePaddle/Paddle/tree/release/1.0.0)
22+
### Latest PaddlePaddle Release: [Fluid 1.1.0](https://github.com/PaddlePaddle/Paddle/tree/release/1.1)
2323
### Install Latest Stable Release:
2424
```
2525
# Linux CPU
2626
pip install paddlepaddle
2727
# Linux GPU cuda9cudnn7
2828
pip install paddlepaddle-gpu
2929
# Linux GPU cuda8cudnn7
30-
pip install paddlepaddle-gpu==1.0.1.post87
30+
pip install paddlepaddle-gpu==1.1.0.post87
3131
# Linux GPU cuda8cudnn5
32-
pip install paddlepaddle-gpu==1.0.1.post85
32+
pip install paddlepaddle-gpu==1.1.0.post85
3333
3434
# For installation on other platform, refer to http://paddlepaddle.org/
3535
```
@@ -76,26 +76,26 @@ pip install paddlepaddle-gpu==1.0.1.post85
7676

7777
## Installation
7878

79-
It is recommended to read [this doc](http://paddlepaddle.org/documentation/docs/zh/1.0/beginners_guide/index.html) on our website.
79+
It is recommended to read [this doc](http://paddlepaddle.org/documentation/docs/zh/1.1/beginners_guide/index.html) on our website.
8080

8181
## Documentation
8282

83-
We provide [English](http://paddlepaddle.org/documentation/docs/en/1.0.0/getstarted/index_en.html) and
84-
[Chinese](http://paddlepaddle.org/documentation/docs/zh/1.0/beginners_guide/index.html) documentation.
83+
We provide [English](http://paddlepaddle.org/documentation/docs/en/1.1/getstarted/index_en.html) and
84+
[Chinese](http://paddlepaddle.org/documentation/docs/zh/1.1/beginners_guide/index.html) documentation.
8585

8686
- [Deep Learning 101](https://github.com/PaddlePaddle/book)
8787

8888
You might want to start from this online interactive book that can run in a Jupyter Notebook.
8989

90-
- [Distributed Training](http://paddlepaddle.org/documentation/docs/zh/1.0/user_guides/howto/training/cluster_howto.html)
90+
- [Distributed Training](http://paddlepaddle.org/documentation/docs/zh/1.1/user_guides/howto/training/cluster_howto.html)
9191

9292
You can run distributed training jobs on MPI clusters.
9393

94-
- [Python API](http://paddlepaddle.org/documentation/api/zh/1.0/fluid.html)
94+
- [Python API](http://paddlepaddle.org/documentation/api/zh/1.1/fluid.html)
9595

9696
Our new API enables much shorter programs.
9797

98-
- [How to Contribute](http://paddlepaddle.org/documentation/docs/zh/1.0/advanced_usage/development/contribute_to_paddle.html)
98+
- [How to Contribute](http://paddlepaddle.org/documentation/docs/zh/1.1/advanced_usage/development/contribute_to_paddle.html)
9999

100100
We appreciate your contributions!
101101

paddle/fluid/API.spec

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ paddle.fluid.layers.chunk_eval ArgSpec(args=['input', 'label', 'chunk_scheme', '
6464
paddle.fluid.layers.sequence_conv ArgSpec(args=['input', 'num_filters', 'filter_size', 'filter_stride', 'padding', 'bias_attr', 'param_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(3, 1, None, None, None, None, None))
6565
paddle.fluid.layers.conv2d ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, None, None))
6666
paddle.fluid.layers.conv3d ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, None, None))
67-
paddle.fluid.layers.sequence_pool ArgSpec(args=['input', 'pool_type'], varargs=None, keywords=None, defaults=None)
67+
paddle.fluid.layers.sequence_pool ArgSpec(args=['input', 'pool_type', 'is_test'], varargs=None, keywords=None, defaults=(False,))
6868
paddle.fluid.layers.sequence_softmax ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(False, None))
6969
paddle.fluid.layers.softmax ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(True, None))
7070
paddle.fluid.layers.pool2d ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None))
@@ -174,6 +174,7 @@ paddle.fluid.layers.mean ArgSpec(args=['x', 'name'], varargs=None, keywords=None
174174
paddle.fluid.layers.mul ArgSpec(args=['x', 'y', 'x_num_col_dims', 'y_num_col_dims', 'name'], varargs=None, keywords=None, defaults=(1, 1, None))
175175
paddle.fluid.layers.sigmoid_cross_entropy_with_logits ArgSpec(args=['x', 'label', 'name'], varargs=None, keywords=None, defaults=(None,))
176176
paddle.fluid.layers.maxout ArgSpec(args=['x', 'groups', 'name'], varargs=None, keywords=None, defaults=(None,))
177+
paddle.fluid.layers.affine_grid ArgSpec(args=['theta', 'out_shape', 'name'], varargs=None, keywords=None, defaults=(None,))
177178
paddle.fluid.layers.sequence_reverse ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
178179
paddle.fluid.layers.affine_channel ArgSpec(args=['x', 'scale', 'bias', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(None, None, 'NCHW', None))
179180
paddle.fluid.layers.hash ArgSpec(args=['input', 'hash_size', 'num_hash', 'name'], varargs=None, keywords=None, defaults=(1, None))

paddle/fluid/framework/details/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ cc_library(scope_buffered_ssa_graph_executor SRCS scope_buffered_ssa_graph_execu
5656
# device_context reduce_op_handle )
5757
cc_library(fast_threaded_ssa_graph_executor SRCS fast_threaded_ssa_graph_executor.cc
5858
DEPS fetch_op_handle ssa_graph_executor scope simple_threadpool device_context)
59+
cc_test(fused_broadcast_op_test SRCS fused_broadcast_op_handle_test.cc DEPS fused_broadcast_op_handle)
5960

6061
cc_library(build_strategy SRCS build_strategy.cc DEPS
6162
graph_viz_pass multi_devices_graph_pass

paddle/fluid/framework/details/all_reduce_op_handle.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ AllReduceOpHandle::AllReduceOpHandle(ir::Node *node,
3434
nccl_ctxs_(ctxs) {
3535
if (nccl_ctxs_) {
3636
for (auto &p : places_) {
37-
this->dev_ctxes_[p] = nccl_ctxs_->DevCtx(p);
37+
this->SetDeviceContext(p, nccl_ctxs_->DevCtx(p));
3838
}
3939
}
4040
}
@@ -46,7 +46,7 @@ AllReduceOpHandle::AllReduceOpHandle(ir::Node *node,
4646
#endif
4747

4848
void AllReduceOpHandle::RunImpl() {
49-
platform::RecordEvent record_event(Name(), dev_ctxes_.begin()->second);
49+
platform::RecordEvent record_event(Name(), dev_ctxes_.cbegin()->second);
5050

5151
if (NoDummyInputSize() == 1) {
5252
return; // No need to all reduce when GPU count = 1;
@@ -127,7 +127,7 @@ void AllReduceOpHandle::RunImpl() {
127127
*local_scopes_[i]->FindVar(kLocalExecScopeName)->Get<Scope *>();
128128
auto &p = places_[i];
129129
auto *var = scope.FindVar(out_var_handles[i]->name_);
130-
auto *dev_ctx = dev_ctxes_[p];
130+
auto *dev_ctx = dev_ctxes_.at(p);
131131

132132
RunAndRecordEvent(p, [&trg, var, dev_ctx, p] {
133133
auto &tensor_gpu = *var->GetMutable<framework::LoDTensor>();

paddle/fluid/framework/details/broadcast_op_handle.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,8 @@ struct BroadcastOpHandle : public OpHandleBase {
4444
nccl_ctxs_(nccl_ctxs) {
4545
if (nccl_ctxs_) {
4646
for (auto &p_ctx : nccl_ctxs_->contexts_) {
47-
dev_ctxes_[platform::CUDAPlace(p_ctx.first)] = p_ctx.second.ctx_.get();
47+
this->SetDeviceContext(platform::CUDAPlace(p_ctx.first),
48+
p_ctx.second.ctx_.get());
4849
}
4950
}
5051
}

paddle/fluid/framework/details/broadcast_op_handle_test.cc

Lines changed: 1 addition & 221 deletions
Original file line numberDiff line numberDiff line change
@@ -12,232 +12,12 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15-
#include "paddle/fluid/framework/details/broadcast_op_handle.h"
16-
#include "gtest/gtest.h"
17-
18-
#include "paddle/fluid/platform/device_context.h"
15+
#include "paddle/fluid/framework/details/broadcast_op_handle_test.h"
1916

2017
namespace paddle {
2118
namespace framework {
2219
namespace details {
2320

24-
namespace f = paddle::framework;
25-
namespace p = paddle::platform;
26-
27-
// test data amount
28-
const f::DDim kDims = {20, 20};
29-
30-
struct TestBroadcastOpHandle {
31-
std::vector<std::unique_ptr<p::DeviceContext>> ctxs_;
32-
std::vector<Scope*> local_scopes_;
33-
std::vector<Scope*> param_scopes_;
34-
Scope g_scope_;
35-
std::unique_ptr<OpHandleBase> op_handle_;
36-
std::vector<std::unique_ptr<VarHandleBase>> vars_;
37-
std::vector<p::Place> gpu_list_;
38-
bool use_gpu_;
39-
#ifdef PADDLE_WITH_CUDA
40-
std::unique_ptr<platform::NCCLContextMap> nccl_ctxs_;
41-
#endif
42-
43-
void WaitAll() {
44-
for (size_t j = 0; j < ctxs_.size(); ++j) {
45-
ctxs_[j]->Wait();
46-
}
47-
#ifdef PADDLE_WITH_CUDA
48-
if (nccl_ctxs_) {
49-
nccl_ctxs_->WaitAll();
50-
}
51-
#endif
52-
}
53-
54-
void InitCtxOnGpu(bool use_gpu) {
55-
use_gpu_ = use_gpu;
56-
if (use_gpu_) {
57-
#ifdef PADDLE_WITH_CUDA
58-
int count = p::GetCUDADeviceCount();
59-
if (count <= 1) {
60-
LOG(WARNING) << "Cannot test multi-gpu Broadcast, because the CUDA "
61-
"device count is "
62-
<< count;
63-
exit(0);
64-
}
65-
for (int i = 0; i < count; ++i) {
66-
auto p = p::CUDAPlace(i);
67-
gpu_list_.push_back(p);
68-
ctxs_.emplace_back(new p::CUDADeviceContext(p));
69-
}
70-
nccl_ctxs_.reset(new platform::NCCLContextMap(gpu_list_));
71-
#else
72-
PADDLE_THROW("CUDA is not support.");
73-
#endif
74-
} else {
75-
int count = 8;
76-
for (int i = 0; i < count; ++i) {
77-
auto p = p::CPUPlace();
78-
gpu_list_.push_back(p);
79-
ctxs_.emplace_back(new p::CPUDeviceContext(p));
80-
}
81-
#ifdef PADDLE_WITH_CUDA
82-
nccl_ctxs_.reset(nullptr);
83-
#endif
84-
}
85-
}
86-
87-
void InitBroadcastOp(size_t input_scope_idx) {
88-
for (size_t j = 0; j < gpu_list_.size(); ++j) {
89-
local_scopes_.push_back(&(g_scope_.NewScope()));
90-
Scope& local_scope = local_scopes_.back()->NewScope();
91-
*local_scopes_.back()
92-
->Var(details::kLocalExecScopeName)
93-
->GetMutable<Scope*>() = &local_scope;
94-
local_scope.Var("out");
95-
param_scopes_.emplace_back(&local_scope);
96-
}
97-
param_scopes_[input_scope_idx]->Var("input");
98-
99-
std::unique_ptr<ir::Node> n =
100-
ir::CreateNodeForTest("node0", ir::Node::Type::kOperation);
101-
if (use_gpu_) {
102-
#ifdef PADDLE_WITH_CUDA
103-
op_handle_.reset(new BroadcastOpHandle(n.get(), local_scopes_, gpu_list_,
104-
nccl_ctxs_.get()));
105-
#else
106-
PADDLE_THROW("CUDA is not support.");
107-
#endif
108-
} else {
109-
#ifdef PADDLE_WITH_CUDA
110-
op_handle_.reset(new BroadcastOpHandle(n.get(), local_scopes_, gpu_list_,
111-
nccl_ctxs_.get()));
112-
#else
113-
op_handle_.reset(
114-
new BroadcastOpHandle(n.get(), local_scopes_, gpu_list_));
115-
#endif
116-
}
117-
118-
std::unique_ptr<ir::Node> v =
119-
ir::CreateNodeForTest("node1", ir::Node::Type::kVariable);
120-
auto* in_var_handle = new VarHandle(v.get(), 1, input_scope_idx, "input",
121-
gpu_list_[input_scope_idx]);
122-
vars_.emplace_back(in_var_handle);
123-
op_handle_->AddInput(in_var_handle);
124-
125-
// add dummy var
126-
127-
std::unique_ptr<ir::Node> v2 =
128-
ir::CreateNodeForTest("node2", ir::Node::Type::kVariable);
129-
vars_.emplace_back(new DummyVarHandle(v2.get()));
130-
DummyVarHandle* dummy_var_handle =
131-
static_cast<DummyVarHandle*>(vars_.back().get());
132-
dummy_var_handle->ClearGeneratedOp();
133-
op_handle_->AddInput(dummy_var_handle);
134-
135-
for (size_t j = 0; j < gpu_list_.size(); ++j) {
136-
if (!use_gpu_) {
137-
op_handle_->SetDeviceContext(gpu_list_[j], ctxs_[j].get());
138-
}
139-
std::unique_ptr<ir::Node> v3 =
140-
ir::CreateNodeForTest("node3", ir::Node::Type::kVariable);
141-
VarHandle* out_var_handle =
142-
new VarHandle(v3.get(), 2, j, "out", gpu_list_[j]);
143-
vars_.emplace_back(out_var_handle);
144-
op_handle_->AddOutput(out_var_handle);
145-
}
146-
147-
// add dummy var
148-
std::unique_ptr<ir::Node> v4 =
149-
ir::CreateNodeForTest("node4", ir::Node::Type::kVariable);
150-
vars_.emplace_back(new DummyVarHandle(v4.get()));
151-
DummyVarHandle* out_dummy_var_handle =
152-
static_cast<DummyVarHandle*>(vars_.back().get());
153-
out_dummy_var_handle->ClearGeneratedOp();
154-
op_handle_->AddOutput(out_dummy_var_handle);
155-
}
156-
157-
void TestBroadcastLodTensor(size_t input_scope_idx) {
158-
auto in_var = param_scopes_[input_scope_idx]->FindVar("input");
159-
PADDLE_ENFORCE_NOT_NULL(in_var);
160-
auto in_lod_tensor = in_var->GetMutable<f::LoDTensor>();
161-
in_lod_tensor->mutable_data<float>(kDims, gpu_list_[input_scope_idx]);
162-
163-
std::vector<float> send_vector(static_cast<size_t>(f::product(kDims)));
164-
for (size_t k = 0; k < send_vector.size(); ++k) {
165-
send_vector[k] = k;
166-
}
167-
f::LoD lod{{0, 10, 20}};
168-
paddle::framework::TensorFromVector<float>(
169-
send_vector, *(ctxs_[input_scope_idx]), in_lod_tensor);
170-
in_lod_tensor->set_lod(lod);
171-
in_lod_tensor->Resize(kDims);
172-
173-
op_handle_->Run(false);
174-
175-
WaitAll();
176-
177-
p::CPUPlace cpu_place;
178-
for (size_t j = 0; j < gpu_list_.size(); ++j) {
179-
auto out_var = param_scopes_[j]->FindVar("out");
180-
PADDLE_ENFORCE_NOT_NULL(out_var);
181-
auto out_tensor = out_var->Get<f::LoDTensor>();
182-
PADDLE_ENFORCE_EQ(out_tensor.lod(), lod, "lod is not equal.");
183-
184-
f::Tensor result_tensor;
185-
f::TensorCopySync(out_tensor, cpu_place, &result_tensor);
186-
float* ct = result_tensor.mutable_data<float>(cpu_place);
187-
188-
for (int64_t i = 0; i < f::product(kDims); ++i) {
189-
ASSERT_NEAR(ct[i], send_vector[i], 1e-5);
190-
}
191-
}
192-
}
193-
194-
void TestBroadcastSelectedRows(size_t input_scope_idx) {
195-
auto in_var = param_scopes_[input_scope_idx]->FindVar("input");
196-
PADDLE_ENFORCE_NOT_NULL(in_var);
197-
auto in_selected_rows = in_var->GetMutable<f::SelectedRows>();
198-
auto value = in_selected_rows->mutable_value();
199-
value->mutable_data<float>(kDims, gpu_list_[input_scope_idx]);
200-
int height = static_cast<int>(kDims[0]) * 2;
201-
std::vector<int64_t> rows{0, 1, 2, 3, 3, 0, 14, 7, 3, 1,
202-
2, 4, 6, 3, 1, 1, 1, 1, 3, 7};
203-
in_selected_rows->set_height(height);
204-
in_selected_rows->set_rows(rows);
205-
206-
std::vector<float> send_vector(static_cast<size_t>(f::product(kDims)));
207-
for (size_t k = 0; k < send_vector.size(); ++k) {
208-
send_vector[k] = k;
209-
}
210-
paddle::framework::TensorFromVector<float>(
211-
send_vector, *(ctxs_[input_scope_idx]), value);
212-
213-
op_handle_->Run(false);
214-
215-
WaitAll();
216-
217-
p::CPUPlace cpu_place;
218-
for (size_t j = 0; j < gpu_list_.size(); ++j) {
219-
auto out_var = param_scopes_[j]->FindVar("out");
220-
PADDLE_ENFORCE_NOT_NULL(out_var);
221-
auto& out_select_rows = out_var->Get<f::SelectedRows>();
222-
auto rt = out_select_rows.value();
223-
224-
PADDLE_ENFORCE_EQ(out_select_rows.height(), height,
225-
"height is not equal.");
226-
for (size_t k = 0; k < out_select_rows.rows().size(); ++k) {
227-
PADDLE_ENFORCE_EQ(out_select_rows.rows()[k], rows[k]);
228-
}
229-
230-
f::Tensor result_tensor;
231-
f::TensorCopySync(rt, cpu_place, &result_tensor);
232-
float* ct = result_tensor.data<float>();
233-
234-
for (int64_t i = 0; i < f::product(kDims); ++i) {
235-
ASSERT_NEAR(ct[i], send_vector[i], 1e-5);
236-
}
237-
}
238-
}
239-
};
240-
24121
TEST(BroadcastTester, TestCPUBroadcastTestLodTensor) {
24222
TestBroadcastOpHandle test_op;
24323
size_t input_scope_idx = 0;

0 commit comments

Comments
 (0)