Skip to content

Commit 1ba3d29

Browse files
committed
update code
2 parents 99a6c5d + 6ecbf08 commit 1ba3d29

File tree

784 files changed

+24400
-10274
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

784 files changed

+24400
-10274
lines changed

CMakeLists.txt

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,14 @@ cmake_minimum_required(VERSION 3.0)
1616
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
1717
set(PADDLE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
1818
set(PADDLE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
19-
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")
20-
SET(CMAKE_C_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")
2119

2220
include(system)
2321

2422
project(paddle CXX C Go)
25-
message(STATUS "CXX compiler: " ${CMAKE_CXX_COMPILER} ", version: " ${CMAKE_CXX_COMPILER_VERSION})
26-
message(STATUS "C compiler: " ${CMAKE_C_COMPILER} ", version: " ${CMAKE_C_COMPILER_VERSION})
23+
message(STATUS "CXX compiler: ${CMAKE_CXX_COMPILER}, version: "
24+
"${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}")
25+
message(STATUS "C compiler: ${CMAKE_C_COMPILER}, version: "
26+
"${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}")
2727

2828
find_package(Sphinx)
2929
if(NOT CMAKE_CROSSCOMPILING)
@@ -201,6 +201,10 @@ if(WITH_GOLANG)
201201
endif(WITH_GOLANG)
202202

203203
set(PADDLE_PYTHON_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/python/build")
204+
205+
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")
206+
SET(CMAKE_C_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")
207+
204208
add_subdirectory(paddle)
205209
if(WITH_PYTHON)
206210
add_subdirectory(python)

README.md

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22

33

44
[![Build Status](https://travis-ci.org/PaddlePaddle/Paddle.svg?branch=develop)](https://travis-ci.org/PaddlePaddle/Paddle)
5-
[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://doc.paddlepaddle.org/develop/doc/)
6-
[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://doc.paddlepaddle.org/develop/doc_cn/)
5+
[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://www.paddlepaddle.org/docs/develop/documentation/en/getstarted/index_en.html)
6+
[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://www.paddlepaddle.org/docs/develop/documentation/zh/getstarted/index_cn.html)
77
[![Coverage Status](https://coveralls.io/repos/github/PaddlePaddle/Paddle/badge.svg?branch=develop)](https://coveralls.io/github/PaddlePaddle/Paddle?branch=develop)
88
[![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases)
99
[![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)
@@ -36,7 +36,7 @@ Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddl
3636
examples:
3737

3838
- Optimized math operations through SSE/AVX intrinsics, BLAS libraries
39-
(e.g. MKL, ATLAS, cuBLAS) or customized CPU/GPU kernels.
39+
(e.g. MKL, OpenBLAS, cuBLAS) or customized CPU/GPU kernels.
4040
- Highly optimized recurrent networks which can handle **variable-length**
4141
sequence without padding.
4242
- Optimized local and distributed training for models with high dimensional
@@ -61,32 +61,32 @@ Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddl
6161
## Installation
6262

6363
It is recommended to check out the
64-
[Docker installation guide](http://doc.paddlepaddle.org/develop/doc/getstarted/build_and_install/docker_install_en.html)
64+
[Docker installation guide](http://www.paddlepaddle.org/docs/develop/documentation/en/getstarted/build_and_install/docker_install_en.html)
6565
before looking into the
66-
[build from source guide](http://doc.paddlepaddle.org/develop/doc/getstarted/build_and_install/build_from_source_en.html).
66+
[build from source guide](http://www.paddlepaddle.org/docs/develop/documentation/en/getstarted/build_and_install/build_from_source_en.html).
6767

6868
## Documentation
6969

70-
We provide [English](http://doc.paddlepaddle.org/develop/doc/) and
71-
[Chinese](http://doc.paddlepaddle.org/doc_cn/) documentation.
70+
We provide [English](http://www.paddlepaddle.org/docs/develop/documentation/en/getstarted/index_en.html) and
71+
[Chinese](http://www.paddlepaddle.org/docs/develop/documentation/zh/getstarted/index_cn.html) documentation.
7272

73-
- [Deep Learning 101](http://book.paddlepaddle.org/index.html)
73+
- [Deep Learning 101](http://www.paddlepaddle.org/docs/develop/book/01.fit_a_line/index.html)
7474

7575
You might want to start from this online interactive book that can run in a Jupyter Notebook.
7676

77-
- [Distributed Training](http://doc.paddlepaddle.org/develop/doc/howto/usage/cluster/cluster_train_en.html)
77+
- [Distributed Training](http://www.paddlepaddle.org/docs/develop/documentation/en/howto/usage/cluster/cluster_train_en.html)
7878

7979
You can run distributed training jobs on MPI clusters.
8080

81-
- [Distributed Training on Kubernetes](http://doc.paddlepaddle.org/develop/doc/howto/usage/k8s/k8s_en.html)
81+
- [Distributed Training on Kubernetes](http://www.paddlepaddle.org/docs/develop/documentation/en/howto/usage/cluster/k8s_en.html)
8282

8383
You can also run distributed training jobs on Kubernetes clusters.
8484

85-
- [Python API](http://doc.paddlepaddle.org/develop/doc/api/index_en.html)
85+
- [Python API](http://www.paddlepaddle.org/docs/develop/documentation/en/api/index_en.html)
8686

8787
Our new API enables much shorter programs.
8888

89-
- [How to Contribute](http://doc.paddlepaddle.org/develop/doc/howto/dev/contribute_to_paddle_en.html)
89+
- [How to Contribute](http://www.paddlepaddle.org/docs/develop/documentation/en/howto/dev/contribute_to_paddle_en.html)
9090

9191
We appreciate your contributions!
9292

benchmark/IntelOptimizedPaddle.md

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@ Machine:
77

88
System: CentOS release 6.3 (Final), Docker 1.12.1.
99

10-
PaddlePaddle: (TODO: will rerun after 0.11.0)
11-
- paddlepaddle/paddle:latest (for MKLML and MKL-DNN)
10+
PaddlePaddle:
11+
- paddlepaddle/paddle:0.11.0 (for MKLML and MKL-DNN)
1212
- MKL-DNN tag v0.11
1313
- MKLML 2018.0.1.20171007
14-
- paddlepaddle/paddle:latest-openblas (for OpenBLAS)
14+
- paddlepaddle/paddle:0.11.0-openblas (for OpenBLAS)
1515
- OpenBLAS v0.2.20
1616

1717
On each machine, we will test and compare the performance of training on single node using MKL-DNN / MKLML / OpenBLAS respectively.
@@ -22,6 +22,7 @@ On each machine, we will test and compare the performance of training on single
2222

2323
#### Training
2424
Test on batch size 64, 128, 256 on Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz
25+
Pay attetion that the speed below includes forward, backward and parameter update time. So we can not directly compare the data with the benchmark of caffe `time` [command](https://github.com/PaddlePaddle/Paddle/blob/develop/benchmark/caffe/image/run.sh#L9), which only contain forward and backward. The updating time of parameter would become very heavy when the weight size are large, especially on alexnet.
2526

2627
Input image size - 3 * 224 * 224, Time: images/second
2728

@@ -55,33 +56,57 @@ Input image size - 3 * 224 * 224, Time: images/second
5556

5657
<img src="figs/googlenet-cpu-train.png" width="500">
5758

59+
- AlexNet
60+
61+
| BatchSize | 64 | 128 | 256 |
62+
|--------------|--------| ------ | -------|
63+
| OpenBLAS | 45.62 | 72.79 | 107.22 |
64+
| MKLML | 66.37 | 105.60 | 144.04 |
65+
| MKL-DNN | 399.00 | 498.94 | 626.53 |
66+
67+
<img src="figs/alexnet-cpu-train.png" width="500">
68+
5869
#### Inference
5970
Test on batch size 1, 2, 4, 8, 16 on Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz
6071
- VGG-19
6172

6273
| BatchSize | 1 | 2 | 4 | 8 | 16 |
6374
|-----------|-------|-------|-------|-------|-------|
64-
| OpenBLAS | 1.07 | 1.08 | 1.06 | 0.88 | 0.65 |
75+
| OpenBLAS | 1.10 | 1.96 | 3.62 | 3.63 | 2.25 |
6576
| MKLML | 5.58 | 9.80 | 15.15 | 21.21 | 28.67 |
6677
| MKL-DNN | 75.07 | 88.64 | 82.58 | 92.29 | 96.75 |
6778

79+
<img src="figs/vgg-cpu-infer.png" width="500">
80+
6881
- ResNet-50
6982

7083
| BatchSize | 1 | 2 | 4 | 8 | 16 |
7184
|-----------|-------|--------|--------|--------|--------|
72-
| OpenBLAS | 3.35 | 3.19 | 3.09 | 2.55 | 1.96 |
85+
| OpenBLAS | 3.31 | 6.72 | 11.59 | 13.17 | 9.27 |
7386
| MKLML | 6.33 | 12.02 | 22.88 | 40.53 | 63.09 |
7487
| MKL-DNN | 107.83| 148.84 | 177.78 | 189.35 | 217.69 |
7588

89+
<img src="figs/resnet-cpu-infer.png" width="500">
7690

7791
- GoogLeNet
7892

7993
| BatchSize | 1 | 2 | 4 | 8 | 16 |
8094
|-----------|--------|--------|--------|--------|--------|
81-
| OpenBLAS | 12.04 | 11.31 | 10.00 | 9.07 | 4.34 |
95+
| OpenBLAS | 12.06 | 23.56 | 34.48 | 36.45 | 23.12 |
8296
| MKLML | 22.74 | 41.56 | 81.22 | 133.47 | 210.53 |
8397
| MKL-DNN | 175.10 | 272.92 | 450.70 | 512.00 | 600.94 |
8498

99+
<img src="figs/googlenet-cpu-infer.png" width="500">
100+
101+
- AlexNet
102+
103+
| BatchSize | 1 | 2 | 4 | 8 | 16 |
104+
|-----------|--------|--------|--------|--------|--------|
105+
| OpenBLAS | 3.53 | 6.23 | 15.04 | 26.06 | 31.62 |
106+
| MKLML | 21.32 | 36.55 | 73.06 | 131.15 | 192.77 |
107+
| MKL-DNN | 442.91 | 656.41 | 719.10 | 847.68 | 850.51 |
108+
109+
<img src="figs/alexnet-cpu-infer.png" width="500">
85110

86111
### Laptop
87112
TBD

benchmark/figs/alexnet-cpu-infer.png

15.1 KB
Loading

benchmark/figs/alexnet-cpu-train.png

15.6 KB
Loading
14.1 KB
Loading
996 Bytes
Loading

benchmark/figs/resnet-cpu-infer.png

13.7 KB
Loading

benchmark/figs/resnet-cpu-train.png

-2.2 KB
Loading

benchmark/figs/vgg-cpu-infer.png

13.7 KB
Loading

0 commit comments

Comments
 (0)