Skip to content

Commit 8efdc77

Browse files
committed
Merge branch 'develop' of github.com:baidu/Paddle into add_comments_to_v2_module
2 parents 88cb8ee + 8bb2613 commit 8efdc77

File tree

13 files changed

+393
-106
lines changed

13 files changed

+393
-106
lines changed

demo/image_classification/api_v2_train.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ def event_handler(event):
6666
sys.stdout.flush()
6767
if isinstance(event, paddle.event.EndPass):
6868
result = trainer.test(
69-
reader=paddle.reader.batched(
69+
reader=paddle.batch(
7070
paddle.dataset.cifar.test10(), batch_size=128),
7171
reader_dict={'image': 0,
7272
'label': 1})
@@ -77,7 +77,7 @@ def event_handler(event):
7777
parameters=parameters,
7878
update_equation=momentum_optimizer)
7979
trainer.train(
80-
reader=paddle.reader.batched(
80+
reader=paddle.batch(
8181
paddle.reader.shuffle(
8282
paddle.dataset.cifar.train10(), buf_size=50000),
8383
batch_size=128),

demo/mnist/api_train_v2.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ def event_handler(event):
9898
result.metrics['classification_error_evaluator']))
9999

100100
trainer.train(
101-
reader=paddle.reader.batched(
101+
reader=paddle.batch(
102102
paddle.reader.shuffle(
103103
paddle.dataset.mnist.train(), buf_size=8192),
104104
batch_size=128),
@@ -115,7 +115,7 @@ def event_handler(event):
115115
probs = paddle.infer(
116116
output=predict,
117117
parameters=parameters,
118-
reader=paddle.reader.batched(
118+
reader=paddle.batch(
119119
paddle.reader.firstn(
120120
paddle.reader.map_readers(lambda item: (item[0], ),
121121
paddle.dataset.mnist.test()),

demo/seqToseq/api_train_v2.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -72,31 +72,35 @@ def main():
7272
# define network topology
7373
cost = seqToseq_net_v2(source_dict_dim, target_dict_dim)
7474
parameters = paddle.parameters.create(cost)
75-
optimizer = paddle.optimizer.Adam(learning_rate=1e-4)
76-
77-
def event_handler(event):
78-
if isinstance(event, paddle.event.EndIteration):
79-
if event.batch_id % 10 == 0:
80-
print "Pass %d, Batch %d, Cost %f, %s" % (
81-
event.pass_id, event.batch_id, event.cost, event.metrics)
8275

76+
# define optimize method and trainer
77+
optimizer = paddle.optimizer.Adam(learning_rate=1e-4)
8378
trainer = paddle.trainer.SGD(cost=cost,
8479
parameters=parameters,
8580
update_equation=optimizer)
8681

82+
# define data reader
8783
reader_dict = {
8884
'source_language_word': 0,
8985
'target_language_word': 1,
9086
'target_language_next_word': 2
9187
}
9288

93-
trn_reader = paddle.reader.batched(
89+
wmt14_reader = paddle.reader.batched(
9490
paddle.reader.shuffle(
9591
train_reader("data/pre-wmt14/train/train"), buf_size=8192),
9692
batch_size=5)
9793

94+
# define event_handler callback
95+
def event_handler(event):
96+
if isinstance(event, paddle.event.EndIteration):
97+
if event.batch_id % 10 == 0:
98+
print "Pass %d, Batch %d, Cost %f, %s" % (
99+
event.pass_id, event.batch_id, event.cost, event.metrics)
100+
101+
# start to train
98102
trainer.train(
99-
reader=trn_reader,
103+
reader=wmt14_reader,
100104
event_handler=event_handler,
101105
num_passes=10000,
102106
reader_dict=reader_dict)

demo/seqToseq/seqToseq_net_v2.py

Lines changed: 39 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,4 @@
1-
import paddle.v2.activation as activation
2-
import paddle.v2.attr as attr
3-
import paddle.v2.data_type as data_type
4-
import paddle.v2.layer as layer
5-
import paddle.v2.networks as networks
1+
import paddle.v2 as paddle
62

73

84
def seqToseq_net_v2(source_dict_dim, target_dict_dim):
@@ -12,79 +8,85 @@ def seqToseq_net_v2(source_dict_dim, target_dict_dim):
128
encoder_size = 512 # dimension of hidden unit in GRU Encoder network
139

1410
#### Encoder
15-
src_word_id = layer.data(
11+
src_word_id = paddle.layer.data(
1612
name='source_language_word',
17-
type=data_type.integer_value_sequence(source_dict_dim))
18-
src_embedding = layer.embedding(
13+
type=paddle.data_type.integer_value_sequence(source_dict_dim))
14+
src_embedding = paddle.layer.embedding(
1915
input=src_word_id,
2016
size=word_vector_dim,
21-
param_attr=attr.ParamAttr(name='_source_language_embedding'))
22-
src_forward = networks.simple_gru(input=src_embedding, size=encoder_size)
23-
src_backward = networks.simple_gru(
17+
param_attr=paddle.attr.ParamAttr(name='_source_language_embedding'))
18+
src_forward = paddle.networks.simple_gru(
19+
input=src_embedding, size=encoder_size)
20+
src_backward = paddle.networks.simple_gru(
2421
input=src_embedding, size=encoder_size, reverse=True)
25-
encoded_vector = layer.concat(input=[src_forward, src_backward])
22+
encoded_vector = paddle.layer.concat(input=[src_forward, src_backward])
2623

2724
#### Decoder
28-
with layer.mixed(size=decoder_size) as encoded_proj:
29-
encoded_proj += layer.full_matrix_projection(input=encoded_vector)
25+
with paddle.layer.mixed(size=decoder_size) as encoded_proj:
26+
encoded_proj += paddle.layer.full_matrix_projection(
27+
input=encoded_vector)
3028

31-
backward_first = layer.first_seq(input=src_backward)
29+
backward_first = paddle.layer.first_seq(input=src_backward)
3230

33-
with layer.mixed(size=decoder_size, act=activation.Tanh()) as decoder_boot:
34-
decoder_boot += layer.full_matrix_projection(input=backward_first)
31+
with paddle.layer.mixed(
32+
size=decoder_size, act=paddle.activation.Tanh()) as decoder_boot:
33+
decoder_boot += paddle.layer.full_matrix_projection(
34+
input=backward_first)
3535

3636
def gru_decoder_with_attention(enc_vec, enc_proj, current_word):
3737

38-
decoder_mem = layer.memory(
38+
decoder_mem = paddle.layer.memory(
3939
name='gru_decoder', size=decoder_size, boot_layer=decoder_boot)
4040

41-
context = networks.simple_attention(
41+
context = paddle.networks.simple_attention(
4242
encoded_sequence=enc_vec,
4343
encoded_proj=enc_proj,
4444
decoder_state=decoder_mem)
4545

46-
with layer.mixed(size=decoder_size * 3) as decoder_inputs:
47-
decoder_inputs += layer.full_matrix_projection(input=context)
48-
decoder_inputs += layer.full_matrix_projection(input=current_word)
46+
with paddle.layer.mixed(size=decoder_size * 3) as decoder_inputs:
47+
decoder_inputs += paddle.layer.full_matrix_projection(input=context)
48+
decoder_inputs += paddle.layer.full_matrix_projection(
49+
input=current_word)
4950

50-
gru_step = layer.gru_step(
51+
gru_step = paddle.layer.gru_step(
5152
name='gru_decoder',
5253
input=decoder_inputs,
5354
output_mem=decoder_mem,
5455
size=decoder_size)
5556

56-
with layer.mixed(
57-
size=target_dict_dim, bias_attr=True,
58-
act=activation.Softmax()) as out:
59-
out += layer.full_matrix_projection(input=gru_step)
57+
with paddle.layer.mixed(
58+
size=target_dict_dim,
59+
bias_attr=True,
60+
act=paddle.activation.Softmax()) as out:
61+
out += paddle.layer.full_matrix_projection(input=gru_step)
6062
return out
6163

6264
decoder_group_name = "decoder_group"
63-
group_input1 = layer.StaticInputV2(input=encoded_vector, is_seq=True)
64-
group_input2 = layer.StaticInputV2(input=encoded_proj, is_seq=True)
65+
group_input1 = paddle.layer.StaticInputV2(input=encoded_vector, is_seq=True)
66+
group_input2 = paddle.layer.StaticInputV2(input=encoded_proj, is_seq=True)
6567
group_inputs = [group_input1, group_input2]
6668

67-
trg_embedding = layer.embedding(
68-
input=layer.data(
69+
trg_embedding = paddle.layer.embedding(
70+
input=paddle.layer.data(
6971
name='target_language_word',
70-
type=data_type.integer_value_sequence(target_dict_dim)),
72+
type=paddle.data_type.integer_value_sequence(target_dict_dim)),
7173
size=word_vector_dim,
72-
param_attr=attr.ParamAttr(name='_target_language_embedding'))
74+
param_attr=paddle.attr.ParamAttr(name='_target_language_embedding'))
7375
group_inputs.append(trg_embedding)
7476

7577
# For decoder equipped with attention mechanism, in training,
7678
# target embeding (the groudtruth) is the data input,
7779
# while encoded source sequence is accessed to as an unbounded memory.
7880
# Here, the StaticInput defines a read-only memory
7981
# for the recurrent_group.
80-
decoder = layer.recurrent_group(
82+
decoder = paddle.layer.recurrent_group(
8183
name=decoder_group_name,
8284
step=gru_decoder_with_attention,
8385
input=group_inputs)
8486

85-
lbl = layer.data(
87+
lbl = paddle.layer.data(
8688
name='target_language_next_word',
87-
type=data_type.integer_value_sequence(target_dict_dim))
88-
cost = layer.classification_cost(input=decoder, label=lbl)
89+
type=paddle.data_type.integer_value_sequence(target_dict_dim))
90+
cost = paddle.layer.classification_cost(input=decoder, label=lbl)
8991

9092
return cost

demo/word2vec/train_v2.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
import math
2+
3+
import paddle.v2 as paddle
4+
5+
dictsize = 1953
6+
embsize = 32
7+
hiddensize = 256
8+
N = 5
9+
10+
11+
def wordemb(inlayer):
12+
wordemb = paddle.layer.table_projection(
13+
input=inlayer,
14+
size=embsize,
15+
param_attr=paddle.attr.Param(
16+
name="_proj",
17+
initial_std=0.001,
18+
learning_rate=1,
19+
l2_rate=0, ))
20+
return wordemb
21+
22+
23+
def main():
24+
paddle.init(use_gpu=False, trainer_count=1)
25+
word_dict = paddle.dataset.imikolov.build_dict()
26+
dict_size = len(word_dict)
27+
firstword = paddle.layer.data(
28+
name="firstw", type=paddle.data_type.integer_value(dict_size))
29+
secondword = paddle.layer.data(
30+
name="secondw", type=paddle.data_type.integer_value(dict_size))
31+
thirdword = paddle.layer.data(
32+
name="thirdw", type=paddle.data_type.integer_value(dict_size))
33+
fourthword = paddle.layer.data(
34+
name="fourthw", type=paddle.data_type.integer_value(dict_size))
35+
nextword = paddle.layer.data(
36+
name="fifthw", type=paddle.data_type.integer_value(dict_size))
37+
38+
Efirst = wordemb(firstword)
39+
Esecond = wordemb(secondword)
40+
Ethird = wordemb(thirdword)
41+
Efourth = wordemb(fourthword)
42+
43+
contextemb = paddle.layer.concat(input=[Efirst, Esecond, Ethird, Efourth])
44+
hidden1 = paddle.layer.fc(input=contextemb,
45+
size=hiddensize,
46+
act=paddle.activation.Sigmoid(),
47+
layer_attr=paddle.attr.Extra(drop_rate=0.5),
48+
bias_attr=paddle.attr.Param(learning_rate=2),
49+
param_attr=paddle.attr.Param(
50+
initial_std=1. / math.sqrt(embsize * 8),
51+
learning_rate=1))
52+
predictword = paddle.layer.fc(input=hidden1,
53+
size=dict_size,
54+
bias_attr=paddle.attr.Param(learning_rate=2),
55+
act=paddle.activation.Softmax())
56+
57+
def event_handler(event):
58+
if isinstance(event, paddle.event.EndIteration):
59+
if event.batch_id % 100 == 0:
60+
result = trainer.test(
61+
paddle.batch(
62+
paddle.dataset.imikolov.test(word_dict, N), 32))
63+
print "Pass %d, Batch %d, Cost %f, %s, Testing metrics %s" % (
64+
event.pass_id, event.batch_id, event.cost, event.metrics,
65+
result.metrics)
66+
67+
cost = paddle.layer.classification_cost(input=predictword, label=nextword)
68+
parameters = paddle.parameters.create(cost)
69+
adam_optimizer = paddle.optimizer.Adam(
70+
learning_rate=3e-3,
71+
regularization=paddle.optimizer.L2Regularization(8e-4))
72+
trainer = paddle.trainer.SGD(cost, parameters, adam_optimizer)
73+
trainer.train(
74+
paddle.batch(paddle.dataset.imikolov.train(word_dict, N), 32),
75+
num_passes=30,
76+
event_handler=event_handler)
77+
78+
79+
if __name__ == '__main__':
80+
main()

doc/api/v2/data.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,13 @@ Reader
2222
.. automodule:: paddle.v2.reader.creator
2323
:members:
2424

25+
#########
26+
minibatch
27+
#########
28+
29+
.. automodule:: paddle.v2.minibatch
30+
:members:
31+
2532
#######
2633
Dataset
2734
#######

doc/howto/usage/k8s/k8s_distributed_cn.md

Lines changed: 56 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -43,22 +43,55 @@ docker push [YOUR_REPO]/paddle:mypaddle
4343

4444
注意上述命令中`[YOUR_REPO]`表示读者所使用的Docker镜像仓库地址,读者需要替换成自己使用的仓库地址。下文使用`[YOUR_REPO]/paddle:mypaddle`这个地址来表示此步骤所构建出的镜像。
4545

46-
### 上传训练文件
46+
### 准备训练数据
4747

48-
本文使用PaddlePaddle官方的[recommendation demo](http://www.paddlepaddle.org/doc/demo/index.html#recommendation)作为这次训练的内容,我们将训练文件与数据放在一个job name命名的目录中,上传到volume所在的共享存储(使用不同分布式存储会有不同的挂载方式,需要要先挂载这个目录,然后拷贝数据)。完成后volume中的文件内容大致如下:
48+
这里我们通过在Kubernetes集群上启动一个Job来下载并切割数据,也可以通过修改[k8s_train](./src/k8s_train/README.md)的内容来定制image.
4949

50-
```bash
51-
[root@paddle-kubernetes-node0 mfs]# tree -d
50+
在启动Job之前,需要根据不同的分布式存储来绑定一个[persistentVolumeClaim](https://kubernetes.io/docs/user-guide/persistent-volumes/),生成的数据将会存储在这个volume下.
51+
52+
```yaml
53+
apiVersion: batch/v1
54+
kind: Job
55+
metadata:
56+
name: paddle-data
57+
spec:
58+
template:
59+
metadata:
60+
name: pi
61+
spec:
62+
hostNetwork: true
63+
containers:
64+
- name: paddle-data
65+
image: paddledev/paddle-tutorial:k8s_data
66+
imagePullPolicy: Always
67+
volumeMounts:
68+
- mountPath: "/mnt"
69+
name: nfs
70+
env:
71+
- name: OUT_DIR
72+
value: /home/work/mfs/paddle-cluster-job
73+
- name: SPLIT_COUNT
74+
value: "3"
75+
volumes:
76+
- name: nfs
77+
persistentVolumeClaim:
78+
claimName: mfs
79+
restartPolicy: Never
80+
```
81+
82+
完成后volume中的文件内容大致如下:
83+
```base
84+
[root@paddle-kubernetes-node0 nfsdir]$ tree -d
5285
.
53-
└── paddle-cluster-job
54-
├── data
55-
│   ├── 0
56-
│   │
57-
│   ├── 1
58-
│   │
59-
│   └── 2
60-
├── output
61-
└── recommendation
86+
`-- paddle-cluster-job
87+
|-- 0
88+
| `-- data
89+
|-- 1
90+
| `-- data
91+
|-- 2
92+
| `-- data
93+
|-- output
94+
|-- quick_start
6295
```
6396

6497
目录中paddle-cluster-job是本次训练对应的job name,本次训练要求有3个PaddlePaddle节点,在paddle-cluster-job/data目录中存放切分好的数据,文件夹0,1,2分别代表3个节点的trainer_id。recommendation文件夹内存放训练文件,output文件夹存放训练结果与日志。
@@ -118,15 +151,16 @@ spec:
118151

119152
`env`字段表示容器的环境变量,我们将`paddle`运行的一些参数通过这种方式传递到容器内。
120153

121-
`JOB_PATH`表示共享存储挂载的路径,`JOB_NAME`表示job名字,`TRAIN_CONFIG_DIR`表示本次训练文件所在目录,这三个变量组合就可以找到本次训练需要的文件路径。
122-
123-
`CONF_PADDLE_NIC`表示`paddle pserver`进程需要的`--nics`参数,即网卡名
124-
125-
`CONF_PADDLE_PORT`表示`paddle pserver`的`--port`参数,`CONF_PADDLE_PORTS_NUM`则表示稠密更新的端口数量,也就是`--ports_num`参数。
126-
127-
`CONF_PADDLE_PORTS_NUM_SPARSE`表示稀疏更新的端口数量,也就是`--ports_num_for_sparse`参数。
128-
129-
`CONF_PADDLE_GRADIENT_NUM`表示训练节点数量,即`--num_gradient_servers`参数
154+
环境变量 | 说明
155+
--- | ---
156+
JOB_PATH | 共享存储挂在的路径
157+
JOB_NAME | Job的名字
158+
TRAIN_CONFIG_DIR | 本次训练文件所在目录,与JOB_PATH,JOB_NAME组合可以找到本次训练需要的文件路径
159+
CONF_PADDLE_NIC | `paddle pserver`进程需要的`--nics`参数,即网卡名
160+
CONF_PADDLE_PORT | `paddle paserver`的`--port`参数
161+
CONF_PADDLE_PORTS_NUM | 稠密更新的端口数量,即`--ports_num`参数
162+
CONF_PADDLE_PORTS_NUM_SPARSE | 稀疏更新的端口数量,即`--ports_num_for_sparse`参数
163+
CONF_PADDLE_GRADIENT_NUM | 训练节点数量,即`--num_gradient_servers参数`
130164

131165
这些参数的具体描述,读者可以查看[这里](http://www.paddlepaddle.org/doc/ui/cmd_argument/detail_introduction.html#parameter-server-and-distributed-communication)。
132166

0 commit comments

Comments
 (0)