Skip to content

Commit 694bc64

Browse files
committed
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into lstm
2 parents 17e3373 + 63ffe52 commit 694bc64

File tree

20 files changed

+959
-190
lines changed

20 files changed

+959
-190
lines changed
500 Bytes
Binary file not shown.

doc/howto/usage/cluster/cluster_train_cn.md

Lines changed: 221 additions & 95 deletions
Large diffs are not rendered by default.

doc/howto/usage/cluster/cluster_train_en.md

Lines changed: 232 additions & 95 deletions
Large diffs are not rendered by default.
142 KB
Loading
33.1 KB
Loading
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
import gzip
2+
import math
3+
4+
import paddle.v2 as paddle
5+
6+
embsize = 32
7+
hiddensize = 256
8+
N = 5
9+
10+
11+
def wordemb(inlayer):
12+
wordemb = paddle.layer.embedding(
13+
input=inlayer,
14+
size=embsize,
15+
param_attr=paddle.attr.Param(
16+
name="_proj",
17+
initial_std=0.001,
18+
learning_rate=1,
19+
l2_rate=0,
20+
sparse_update=True))
21+
return wordemb
22+
23+
24+
def main():
25+
# for local training
26+
cluster_train = False
27+
28+
if not cluster_train:
29+
paddle.init(use_gpu=False, trainer_count=1)
30+
else:
31+
paddle.init(
32+
use_gpu=False,
33+
trainer_count=2,
34+
port=7164,
35+
ports_num=1,
36+
ports_num_for_sparse=1,
37+
num_gradient_servers=1)
38+
word_dict = paddle.dataset.imikolov.build_dict()
39+
dict_size = len(word_dict)
40+
firstword = paddle.layer.data(
41+
name="firstw", type=paddle.data_type.integer_value(dict_size))
42+
secondword = paddle.layer.data(
43+
name="secondw", type=paddle.data_type.integer_value(dict_size))
44+
thirdword = paddle.layer.data(
45+
name="thirdw", type=paddle.data_type.integer_value(dict_size))
46+
fourthword = paddle.layer.data(
47+
name="fourthw", type=paddle.data_type.integer_value(dict_size))
48+
nextword = paddle.layer.data(
49+
name="fifthw", type=paddle.data_type.integer_value(dict_size))
50+
51+
Efirst = wordemb(firstword)
52+
Esecond = wordemb(secondword)
53+
Ethird = wordemb(thirdword)
54+
Efourth = wordemb(fourthword)
55+
56+
contextemb = paddle.layer.concat(input=[Efirst, Esecond, Ethird, Efourth])
57+
hidden1 = paddle.layer.fc(input=contextemb,
58+
size=hiddensize,
59+
act=paddle.activation.Sigmoid(),
60+
layer_attr=paddle.attr.Extra(drop_rate=0.5),
61+
bias_attr=paddle.attr.Param(learning_rate=2),
62+
param_attr=paddle.attr.Param(
63+
initial_std=1. / math.sqrt(embsize * 8),
64+
learning_rate=1))
65+
predictword = paddle.layer.fc(input=hidden1,
66+
size=dict_size,
67+
bias_attr=paddle.attr.Param(learning_rate=2),
68+
act=paddle.activation.Softmax())
69+
70+
def event_handler(event):
71+
if isinstance(event, paddle.event.EndIteration):
72+
if event.batch_id % 100 == 0:
73+
with gzip.open("batch-" + str(event.batch_id) + ".tar.gz",
74+
'w') as f:
75+
trainer.save_parameter_to_tar(f)
76+
result = trainer.test(
77+
paddle.batch(
78+
paddle.dataset.imikolov.test(word_dict, N), 32))
79+
print "Pass %d, Batch %d, Cost %f, %s, Testing metrics %s" % (
80+
event.pass_id, event.batch_id, event.cost, event.metrics,
81+
result.metrics)
82+
83+
cost = paddle.layer.classification_cost(input=predictword, label=nextword)
84+
85+
parameters = paddle.parameters.create(cost)
86+
adagrad = paddle.optimizer.AdaGrad(
87+
learning_rate=3e-3,
88+
regularization=paddle.optimizer.L2Regularization(8e-4))
89+
trainer = paddle.trainer.SGD(cost,
90+
parameters,
91+
adagrad,
92+
is_local=not cluster_train)
93+
trainer.train(
94+
paddle.batch(paddle.dataset.imikolov.train(word_dict, N), 32),
95+
num_passes=30,
96+
event_handler=event_handler)
97+
98+
99+
if __name__ == '__main__':
100+
main()
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
import math
2+
import os
3+
import paddle.v2 as paddle
4+
import pickle
5+
6+
embsize = 32
7+
hiddensize = 256
8+
N = 5
9+
cluster_train_file = "./train_data_dir/train/train.txt"
10+
cluster_test_file = "./test_data_dir/test/test.txt"
11+
node_id = os.getenv("OMPI_COMM_WORLD_RANK")
12+
if not node_id:
13+
raise EnvironmentError("must provied OMPI_COMM_WORLD_RANK")
14+
15+
16+
def wordemb(inlayer):
17+
wordemb = paddle.layer.embedding(
18+
input=inlayer,
19+
size=embsize,
20+
param_attr=paddle.attr.Param(
21+
name="_proj",
22+
initial_std=0.001,
23+
learning_rate=1,
24+
l2_rate=0,
25+
sparse_update=True))
26+
return wordemb
27+
28+
29+
def cluster_reader_cluster(filename, node_id):
30+
def cluster_reader():
31+
with open("-".join([filename, "%05d" % int(node_id)]), "r") as f:
32+
for l in f:
33+
csv_data = [int(cell) for cell in l.split(",")]
34+
yield tuple(csv_data)
35+
36+
return cluster_reader
37+
38+
39+
def main():
40+
# get arguments from env
41+
42+
# for local training
43+
TRUTH = ["true", "True", "TRUE", "1", "yes", "Yes", "YES"]
44+
cluster_train = os.getenv('PADDLE_CLUSTER_TRAIN', "False") in TRUTH
45+
use_gpu = os.getenv('PADDLE_INIT_USE_GPU', "False")
46+
47+
if not cluster_train:
48+
paddle.init(
49+
use_gpu=use_gpu,
50+
trainer_count=int(os.getenv("PADDLE_INIT_TRAINER_COUNT", "1")))
51+
else:
52+
paddle.init(
53+
use_gpu=use_gpu,
54+
trainer_count=int(os.getenv("PADDLE_INIT_TRAINER_COUNT", "1")),
55+
port=int(os.getenv("PADDLE_INIT_PORT", "7164")),
56+
ports_num=int(os.getenv("PADDLE_INIT_PORTS_NUM", "1")),
57+
ports_num_for_sparse=int(
58+
os.getenv("PADDLE_INIT_PORTS_NUM_FOR_SPARSE", "1")),
59+
num_gradient_servers=int(
60+
os.getenv("PADDLE_INIT_NUM_GRADIENT_SERVERS", "1")),
61+
trainer_id=int(os.getenv("PADDLE_INIT_TRAINER_ID", "0")),
62+
pservers=os.getenv("PADDLE_INIT_PSERVERS", "127.0.0.1"))
63+
fn = open("thirdparty/wuyi_train_thdpty/word_dict.pickle", "r")
64+
word_dict = pickle.load(fn)
65+
fn.close()
66+
dict_size = len(word_dict)
67+
firstword = paddle.layer.data(
68+
name="firstw", type=paddle.data_type.integer_value(dict_size))
69+
secondword = paddle.layer.data(
70+
name="secondw", type=paddle.data_type.integer_value(dict_size))
71+
thirdword = paddle.layer.data(
72+
name="thirdw", type=paddle.data_type.integer_value(dict_size))
73+
fourthword = paddle.layer.data(
74+
name="fourthw", type=paddle.data_type.integer_value(dict_size))
75+
nextword = paddle.layer.data(
76+
name="fifthw", type=paddle.data_type.integer_value(dict_size))
77+
78+
Efirst = wordemb(firstword)
79+
Esecond = wordemb(secondword)
80+
Ethird = wordemb(thirdword)
81+
Efourth = wordemb(fourthword)
82+
83+
contextemb = paddle.layer.concat(input=[Efirst, Esecond, Ethird, Efourth])
84+
hidden1 = paddle.layer.fc(input=contextemb,
85+
size=hiddensize,
86+
act=paddle.activation.Sigmoid(),
87+
layer_attr=paddle.attr.Extra(drop_rate=0.5),
88+
bias_attr=paddle.attr.Param(learning_rate=2),
89+
param_attr=paddle.attr.Param(
90+
initial_std=1. / math.sqrt(embsize * 8),
91+
learning_rate=1))
92+
predictword = paddle.layer.fc(input=hidden1,
93+
size=dict_size,
94+
bias_attr=paddle.attr.Param(learning_rate=2),
95+
act=paddle.activation.Softmax())
96+
97+
def event_handler(event):
98+
if isinstance(event, paddle.event.EndIteration):
99+
if event.batch_id % 100 == 0:
100+
result = trainer.test(
101+
paddle.batch(
102+
cluster_reader_cluster(cluster_test_file, node_id), 32))
103+
print "Pass %d, Batch %d, Cost %f, %s, Testing metrics %s" % (
104+
event.pass_id, event.batch_id, event.cost, event.metrics,
105+
result.metrics)
106+
107+
cost = paddle.layer.classification_cost(input=predictword, label=nextword)
108+
parameters = paddle.parameters.create(cost)
109+
adagrad = paddle.optimizer.AdaGrad(
110+
learning_rate=3e-3,
111+
regularization=paddle.optimizer.L2Regularization(8e-4))
112+
trainer = paddle.trainer.SGD(cost,
113+
parameters,
114+
adagrad,
115+
is_local=not cluster_train)
116+
trainer.train(
117+
paddle.batch(cluster_reader_cluster(cluster_train_file, node_id), 32),
118+
num_passes=30,
119+
event_handler=event_handler)
120+
121+
122+
if __name__ == '__main__':
123+
main()
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import paddle.v2 as paddle
2+
import tarfile
3+
import os
4+
import pickle
5+
6+
SPLIT_COUNT = 3
7+
N = 5
8+
9+
10+
def file_len(fd):
11+
for i, l in enumerate(fd):
12+
pass
13+
return i + 1
14+
15+
16+
def split_from_reader_by_line(filename, reader, split_count):
17+
fn = open(filename, "w")
18+
for batch_id, batch_data in enumerate(reader()):
19+
batch_data_str = [str(d) for d in batch_data]
20+
fn.write(",".join(batch_data_str))
21+
fn.write("\n")
22+
fn.close()
23+
24+
fn = open(filename, "r")
25+
total_line_count = file_len(fn)
26+
fn.close()
27+
per_file_lines = total_line_count / split_count + 1
28+
cmd = "split -d -a 5 -l %d %s %s-" % (per_file_lines, filename, filename)
29+
os.system(cmd)
30+
31+
32+
word_dict = paddle.dataset.imikolov.build_dict()
33+
with open("word_dict.pickle", "w") as dict_f:
34+
pickle.dump(word_dict, dict_f)
35+
36+
split_from_reader_by_line("train.txt",
37+
paddle.dataset.imikolov.train(word_dict, N),
38+
SPLIT_COUNT)
39+
split_from_reader_by_line("test.txt",
40+
paddle.dataset.imikolov.test(word_dict, N),
41+
SPLIT_COUNT)

paddle/parameter/FirstOrderOptimizer.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,10 @@ class AdamParameterOptimizer : public ParameterOptimizer {
265265
addParameterType(PARAMETER_SECOND_MOMENTUM);
266266
}
267267

268+
virtual void startBatch(int64_t numSamplesProcessed) {
269+
learningRate_ = calcLearningRate(numSamplesProcessed, pass_);
270+
}
271+
268272
virtual void finishBatch() { ++step_; }
269273

270274
virtual void update(const VectorPtr vecs[],
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
HOSTS = [
16+
17+
18+
19+
]
20+
'''
21+
workspace configuration
22+
'''
23+
#root dir for workspace, can be set as any director with real user account
24+
ROOT_DIR = "/root"
25+
'''
26+
network configuration
27+
'''
28+
#pserver nics
29+
PADDLE_NIC = "eth0"
30+
#pserver port
31+
PADDLE_PORT = 7164
32+
#pserver ports num
33+
PADDLE_PORTS_NUM = 1
34+
#pserver sparse ports num
35+
PADDLE_PORTS_NUM_FOR_SPARSE = 1
36+
#trainer whether use gpu
37+
PADDLE_USE_GPU = "False"
38+
#environments setting for all processes in cluster job
39+
LD_LIBRARY_PATH = "/usr/local/cuda/lib64:/usr/lib64"

0 commit comments

Comments
 (0)