Skip to content

Commit bdc8295

Browse files
committed
Merge branch 'develop' of github.com:baidu/Paddle into feature/make_lod_a_share_ptr
2 parents 0cfb546 + 5e90f5e commit bdc8295

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

84 files changed

+1785
-276
lines changed

cmake/external/mkldnn.cmake

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,9 +63,30 @@ ExternalProject_Add(
6363
-DMKLROOT:PATH=${MKLML_ROOT}
6464
)
6565

66-
ADD_LIBRARY(mkldnn SHARED IMPORTED GLOBAL)
67-
SET_PROPERTY(TARGET mkldnn PROPERTY IMPORTED_LOCATION ${MKLDNN_LIB})
68-
ADD_DEPENDENCIES(mkldnn ${MKLDNN_PROJECT})
66+
ADD_LIBRARY(shared_mkldnn SHARED IMPORTED GLOBAL)
67+
SET_PROPERTY(TARGET shared_mkldnn PROPERTY IMPORTED_LOCATION ${MKLDNN_LIB})
68+
ADD_DEPENDENCIES(shared_mkldnn ${MKLDNN_PROJECT})
6969
MESSAGE(STATUS "MKLDNN library: ${MKLDNN_LIB}")
7070
add_definitions(-DPADDLE_WITH_MKLDNN)
71-
LIST(APPEND external_project_dependencies mkldnn)
71+
LIST(APPEND external_project_dependencies shared_mkldnn)
72+
73+
# generate a static dummy target to track mkldnn dependencies
74+
# for cc_library(xxx SRCS xxx.c DEPS mkldnn)
75+
SET(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/mkldnn_dummy.c)
76+
FILE(WRITE ${dummyfile} "const char * dummy = \"${dummyfile}\";")
77+
ADD_LIBRARY(mkldnn STATIC ${dummyfile})
78+
TARGET_LINK_LIBRARIES(mkldnn ${MKLDNN_LIB} ${MKLML_LIB} ${MKLML_IOMP_LIB})
79+
ADD_DEPENDENCIES(mkldnn ${MKLDNN_PROJECT})
80+
81+
# copy the real so.0 lib to install dir
82+
# it can be directly contained in wheel or capi
83+
SET(MKLDNN_SHARED_LIB ${MKLDNN_INSTALL_DIR}/libmkldnn.so.0)
84+
ADD_CUSTOM_COMMAND(OUTPUT ${MKLDNN_SHARED_LIB}
85+
COMMAND cp ${MKLDNN_LIB} ${MKLDNN_SHARED_LIB}
86+
DEPENDS mkldnn)
87+
ADD_CUSTOM_TARGET(mkldnn_shared_lib ALL DEPENDS ${MKLDNN_SHARED_LIB})
88+
89+
IF(WITH_C_API)
90+
INSTALL(FILES ${MKLDNN_SHARED_LIB} DESTINATION lib)
91+
ENDIF()
92+

cmake/external/mklml.cmake

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,3 +66,7 @@ ADD_LIBRARY(mklml SHARED IMPORTED GLOBAL)
6666
SET_PROPERTY(TARGET mklml PROPERTY IMPORTED_LOCATION ${MKLML_LIB})
6767
ADD_DEPENDENCIES(mklml ${MKLML_PROJECT})
6868
LIST(APPEND external_project_dependencies mklml)
69+
70+
IF(WITH_C_API)
71+
INSTALL(FILES ${MKLML_LIB} ${MKLML_IOMP_LIB} DESTINATION lib)
72+
ENDIF()

doc/design/ci_build_whl.png

280 KB
Loading

doc/design/releasing_process.md

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,9 @@ PaddlePaddle每次发新的版本,遵循以下流程:
77
1.`develop`分支派生出新的分支,分支名为`release/版本号`。例如,`release/0.10.0`
88
1. 将新分支的版本打上tag,tag为`版本号rc.Patch号`。第一个tag为`0.10.0rc1`,第二个为`0.10.0rc2`,依次类推。
99
1. 对这个版本的提交,做如下几个操作:
10+
* 使用Regression Test List作为检查列表,测试本次release的正确性。
11+
* 如果失败,记录下所有失败的例子,在这个`release/版本号`分支中,修复所有bug后,Patch号加一,到第二步
1012
* 修改`python/setup.py.in`中的版本信息,并将`istaged`字段设为`True`。
11-
* 编译这个版本的Docker发行镜像,发布到dockerhub。如果失败,修复Docker编译镜像问题,Patch号加一,返回第二步
12-
* 编译这个版本的Ubuntu Deb包。如果失败,修复Ubuntu Deb包编译问题,Patch号加一,返回第二步。
13-
* 使用Regression Test List作为检查列表,测试Docker镜像/ubuntu安装包的功能正确性
14-
* 如果失败,记录下所有失败的例子,在这个`release/版本号`分支中,修复所有bug后,Patch号加一,返回第二步
1513
* 编译这个版本的python wheel包,并发布到pypi。
1614
* 由于pypi.python.org目前遵循[严格的命名规范PEP 513](https://www.python.org/dev/peps/pep-0513),在使用twine上传之前,需要重命名wheel包中platform相关的后缀,比如将`linux_x86_64`修改成`manylinux1_x86_64`。
1715
* pypi上的package名称为paddlepaddle和paddlepaddle_gpu,如果要上传GPU版本的包,需要修改build/python/setup.py中,name: "paddlepaddle_gpu"并重新打包wheel包:`python setup.py bdist_wheel`。
@@ -21,8 +19,8 @@ PaddlePaddle每次发新的版本,遵循以下流程:
2119
pip install twine
2220
twine upload dist/[package to upload]
2321
```
22+
* 编译这个版本的Docker发行镜像,发布到dockerhub。如果失败,修复Docker编译镜像问题,Patch号加一,返回第二步
2423
1. 第三步完成后,将`release/版本号`分支合入master分支,并删除`release/版本号`分支。将master分支的合入commit打上tag,tag为`版本号`。同时再将`master`分支合入`develop`分支。最后删除`release/版本号`分支。
25-
1. 编译master分支的Docker发行镜像,发布到dockerhub。编译ubuntu的deb包,发布到github release页面
2624
1. 协同完成Release Note的书写
2725

2826

@@ -31,6 +29,30 @@ PaddlePaddle每次发新的版本,遵循以下流程:
3129
* `release/版本号`分支一旦建立,一般不允许再从`develop`分支合入`release/版本号`。这样保证`release/版本号`分支功能的封闭,方便测试人员测试PaddlePaddle的行为。
3230
*`release/版本号`分支存在的时候,如果有bugfix的行为,需要将bugfix的分支同时merge到`master`, `develop``release/版本号`这三个分支。
3331

32+
## 发布wheel包到pypi
33+
34+
使用[PaddlePaddle CI](https://paddleci.ngrok.io/project.html?projectId=Manylinux1&tab=projectOverview)
35+
完成自动化二进制编译,参考下图,选择需要发布的版本(通常包含一个CPU版本和一个GPU版本),点击"run"右侧的"..."按钮,可以
36+
弹出下面的选择框,在第二个tab (Changes)里选择需要发布的分支,这里选择0.11.0,然后点击"Run Build"按钮。等待编译完成后
37+
可以在此页面的"Artifacts"下拉框中找到生成的3个二进制文件,分别对应CAPI,`cp27m``cp27mu`的版本。然后按照上述的方法
38+
使用`twine`工具上传即可。
39+
40+
<img src="ci_build_whl.png">
41+
42+
* 注:CI环境使用 https://github.com/PaddlePaddle/buildtools 这里的DockerImage作为编译环境以支持更多的Linux
43+
发型版,如果需要手动编译,也可以使用这些镜像。这些镜像也可以从 https://hub.docker.com/r/paddlepaddle/paddle_manylinux_devel/tags/ 下载得到。
44+
* pypi不支持覆盖上传,所以一个版本号的wheel包发布之后,不可以更改。下一个wheel包需要更新版本号才可以上传。
45+
46+
## 发布Docker镜像
47+
48+
上述PaddlePaddle CI编译wheel完成后会自动将Docker镜像push到DockerHub,所以,发布Docker镜像只需要对自动push的镜像打上
49+
版本号对应的tag即可:
50+
51+
1. 进入 https://hub.docker.com/r/paddlepaddle/paddle/tags/ 查看latest tag的更新时间是否在上述编译wheel包完成后是否最新。
52+
1. 执行 `docker pull paddlepaddle/paddle:[latest tag]`,latest tag可以是latest或latest-gpu等。
53+
1. 执行 `docker tag paddlepaddle/paddle:[latest tag] paddlepaddle/paddle:[version]`
54+
1. 执行 `docker push paddlepaddle/paddle:[version]`
55+
3456
## PaddlePaddle 分支规范
3557

3658
PaddlePaddle开发过程使用[git-flow](http://nvie.com/posts/a-successful-git-branching-model/)分支规范,并适应github的特性做了一些区别。

paddle/framework/CMakeLists.txt

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,9 @@ cc_test(threadpool_test SRCS threadpool_test.cc DEPS threadpool)
3232
cc_library(scope SRCS scope.cc DEPS glog threadpool)
3333
cc_test(scope_test SRCS scope_test.cc DEPS scope)
3434

35-
cc_library(data_transform SRCS data_transform.cc DEPS math_function tensor framework_proto)
35+
cc_library(device_data_transform SRCS device_data_transform.cc DEPS tensor)
36+
37+
cc_library(data_transform SRCS data_transform.cc DEPS math_function tensor framework_proto selected_rows device_data_transform)
3638
cc_test(data_transform_test SRCS data_transform_test.cc DEPS data_transform device_context)
3739

3840
cc_library(attribute SRCS attribute.cc DEPS framework_proto)
@@ -41,7 +43,7 @@ device_context)
4143
cc_library(op_proto_maker SRCS op_proto_maker.cc DEPS framework_proto attribute)
4244
cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker)
4345
cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto)
44-
cc_library(shape_inference SRCS shape_inference.cc DEPS ddim attribute)
46+
cc_library(shape_inference SRCS shape_inference.cc DEPS ddim attribute device_context)
4547
cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog
4648
shape_inference data_transform)
4749
cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry init)
@@ -73,9 +75,10 @@ cc_test(var_type_inference_test SRCS var_type_inference_test.cc DEPS op_registry
7375
cc_library(selected_rows SRCS selected_rows.cc DEPS tensor)
7476
cc_test(selected_rows_test SRCS selected_rows_test.cc DEPS selected_rows)
7577

76-
77-
cc_library(init SRCS init.cc DEPS gflags device_context place stringpiece)
78+
cc_library(init SRCS init.cc DEPS gflags device_context place stringpiece operator)
7879
cc_test(init_test SRCS init_test.cc DEPS init)
7980

8081
cc_test(op_kernel_type_test SRCS op_kernel_type_test.cc DEPS place device_context framework_proto)
8182
cc_test(cow_ptr_tests SRCS details/cow_ptr_test.cc)
83+
nv_test(device_data_transform_test SRCS device_data_transform_test.cu
84+
DEPS operator op_registry init math_function)

paddle/framework/backward.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -427,7 +427,8 @@ std::vector<std::unique_ptr<OpDesc>> MakeBlockBackward(
427427
VLOG(5) << "Making backward " << (*it)->Type() << " op";
428428
std::vector<std::unique_ptr<OpDesc>> op_grads;
429429

430-
if ((*it)->Type() == "recurrent" || (*it)->Type() == "while") {
430+
if ((*it)->Type() == "recurrent" || (*it)->Type() == "while" ||
431+
(*it)->Type() == "parallel_do") {
431432
int step_block_idx = (*it)->GetBlockAttr("sub_block");
432433
BlockDesc* backward_block = CreateStepBlock(program_desc, no_grad_vars,
433434
grad_to_var, step_block_idx);

paddle/framework/data_transform.cc

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@ limitations under the License. */
1414
#include <functional>
1515

1616
#include "paddle/framework/data_transform.h"
17+
#include "paddle/framework/device_data_transform.h"
1718
#include "paddle/framework/lod_tensor.h"
19+
#include "paddle/framework/selected_rows.h"
1820
#include "paddle/platform/device_context.h"
1921

2022
namespace paddle {
@@ -25,6 +27,37 @@ DataTransformFnMap& DataTransformFnMap::Instance() {
2527
return data_transform_map;
2628
}
2729

30+
Tensor* DataTransform(const OpKernelType& expected_kernel_type,
31+
const OpKernelType& kernel_type_for_var,
32+
const Tensor& input_tensor) {
33+
Tensor* out = nullptr;
34+
if (!platform::is_same_place(kernel_type_for_var.place_,
35+
expected_kernel_type.place_)) {
36+
out = DeviceTransform(input_tensor, expected_kernel_type.place_);
37+
}
38+
PADDLE_ENFORCE_NOT_NULL(out, "out should not be null");
39+
return out;
40+
}
41+
42+
void CopyVariableWithTensor(const Variable& in_var, const Tensor& tensor,
43+
Variable& out_var) {
44+
if (in_var.IsType<LoDTensor>()) {
45+
auto& in_lod_tensor = in_var.Get<LoDTensor>();
46+
auto* tran_lod_tensor = out_var.GetMutable<LoDTensor>();
47+
tran_lod_tensor->set_lod(in_lod_tensor.lod());
48+
tran_lod_tensor->set_layout(in_lod_tensor.layout());
49+
tran_lod_tensor->ShareDataWith(tensor);
50+
} else if (in_var.IsType<SelectedRows>()) {
51+
auto& in_selected_rows = in_var.Get<SelectedRows>();
52+
auto* trans_selected_rows = out_var.GetMutable<SelectedRows>();
53+
trans_selected_rows->set_height(in_selected_rows.height());
54+
trans_selected_rows->set_rows(in_selected_rows.rows());
55+
trans_selected_rows->mutable_value()->ShareDataWith(tensor);
56+
} else {
57+
PADDLE_THROW("unknown var type");
58+
}
59+
}
60+
2861
auto KernelFP32 = OpKernelType(proto::DataType::FP32, platform::CPUPlace(),
2962
DataLayout::kNHWC, LibraryType::kPlain);
3063

@@ -37,6 +70,28 @@ auto KernelNHWC = OpKernelType(proto::DataType::FP64, platform::CPUPlace(),
3770
auto KernelNCHW = OpKernelType(proto::DataType::FP64, platform::CPUPlace(),
3871
DataLayout::kNCHW, LibraryType::kPlain);
3972

73+
// TODO(dzhwinter): Only for testing multiple op kernel.
74+
// Dummy transform function for library_type
75+
// should be removed.
76+
auto KernelPlain = OpKernelType(proto::DataType::FP32, platform::CUDAPlace(0),
77+
DataLayout::kAnyLayout, LibraryType::kPlain);
78+
79+
auto KernelCUDNN = OpKernelType(proto::DataType::FP32, platform::CUDAPlace(0),
80+
DataLayout::kAnyLayout, LibraryType::kCUDNN);
81+
82+
void DummyTrans(const platform::DeviceContext* ctx,
83+
const KernelTypePair& kernel_pair, const Variable& in,
84+
Variable* out) {
85+
PADDLE_ENFORCE(in.IsType<Tensor>(), "Only Support Tensor transform!.");
86+
PADDLE_ENFORCE(
87+
platform::places_are_same_class(kernel_pair.first.place_,
88+
kernel_pair.second.place_),
89+
"TransDataType Only Support DataType transform on same place!");
90+
auto src = in.Get<Tensor>();
91+
auto* dst = out->GetMutable<Tensor>();
92+
*dst = src;
93+
}
94+
4095
void TransDataType(const platform::DeviceContext* ctx,
4196
const KernelTypePair& kernel_pair, const Variable& in,
4297
Variable* out) {
@@ -121,6 +176,8 @@ std::vector<int> NCHW2NHWC = {0, 2, 3, 1};
121176
}
122177

123178
REGISTER_DATA_TRANSFORM_FN(f::KernelFP32, f::KernelFP64, f::TransDataType);
179+
REGISTER_DATA_TRANSFORM_FN(f::KernelPlain, f::KernelCUDNN, f::DummyTrans);
180+
REGISTER_DATA_TRANSFORM_FN(f::KernelCUDNN, f::KernelPlain, f::DummyTrans);
124181
REGISTER_DATA_TRANSFORM_FN(f::KernelNHWC, f::KernelNCHW,
125182
std::bind(f::TransDataLayout, NHWC2NCHW,
126183
std::placeholders::_1,

paddle/framework/data_transform.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ limitations under the License. */
1919
#include <vector>
2020

2121
#include "paddle/framework/op_kernel_type.h"
22+
#include "paddle/framework/selected_rows.h"
2223
#include "paddle/framework/tensor.h"
2324
#include "paddle/framework/variable.h"
2425
#include "paddle/operators/math/math_function.h"
@@ -49,6 +50,13 @@ struct KernelTypePairHash {
4950
}
5051
};
5152

53+
Tensor* DataTransform(const OpKernelType& expected_kernel_type,
54+
const OpKernelType& kernel_type_for_var,
55+
const Tensor& input_tensor);
56+
57+
void CopyVariableWithTensor(const Variable& in_var, const Tensor& tensor,
58+
Variable& out_var);
59+
5260
template <typename InType, typename OutType>
5361
struct CastDataTypeFunctor {
5462
HOSTDEVICE inline OutType operator()(InType in) const {
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
2+
Licensed under the Apache License, Version 2.0 (the "License");
3+
you may not use this file except in compliance with the License.
4+
You may obtain a copy of the License at
5+
6+
http://www.apache.org/licenses/LICENSE-2.0
7+
8+
Unless required by applicable law or agreed to in writing, software
9+
distributed under the License is distributed on an "AS IS" BASIS,
10+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
See the License for the specific language governing permissions and
12+
limitations under the License. */
13+
14+
#include "paddle/framework/device_data_transform.h"
15+
16+
namespace paddle {
17+
namespace framework {
18+
19+
static const platform::DeviceContext* GetDeviceContext(
20+
const platform::Place& src_place, const platform::Place& dst_place) {
21+
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
22+
23+
if (platform::is_gpu_place(src_place) && platform::is_cpu_place(dst_place)) {
24+
return pool.Get(src_place);
25+
} else if (platform::is_cpu_place(src_place) &&
26+
platform::is_gpu_place(dst_place)) {
27+
return pool.Get(dst_place);
28+
} else {
29+
PADDLE_THROW(
30+
"Currently, model parallelism is only supported between CPU and CUDA");
31+
}
32+
}
33+
34+
Tensor* DeviceTransform(const Tensor& in, const platform::Place& dst_place) {
35+
VLOG(3) << "DeviceTransform in, src_place " << in.place()
36+
<< " dst_place: " << dst_place;
37+
Tensor* out = new Tensor();
38+
auto* dev_ctx = GetDeviceContext(in.place(), dst_place);
39+
dev_ctx->Wait();
40+
CopyFrom(in, dst_place, *dev_ctx, out);
41+
dev_ctx->Wait();
42+
return out;
43+
}
44+
45+
} // namespace framework
46+
} // namespace paddle
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
2+
Licensed under the Apache License, Version 2.0 (the "License");
3+
you may not use this file except in compliance with the License.
4+
You may obtain a copy of the License at
5+
6+
http://www.apache.org/licenses/LICENSE-2.0
7+
8+
Unless required by applicable law or agreed to in writing, software
9+
distributed under the License is distributed on an "AS IS" BASIS,
10+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
See the License for the specific language governing permissions and
12+
limitations under the License. */
13+
14+
#pragma once
15+
16+
#include "paddle/framework/lod_tensor.h"
17+
#include "paddle/framework/tensor.h"
18+
#include "paddle/framework/tensor_util.h"
19+
#include "paddle/platform/device_context.h"
20+
21+
namespace paddle {
22+
namespace framework {
23+
24+
Tensor* DeviceTransform(const Tensor& in, const platform::Place& dst_place);
25+
26+
} // namespace framework
27+
} // namespace paddle

0 commit comments

Comments
 (0)