This repository was archived by the owner on May 24, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 147
Expand file tree
/
Copy pathconfig.proto
More file actions
167 lines (110 loc) · 5.28 KB
/
config.proto
File metadata and controls
167 lines (110 loc) · 5.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
// @file config.proto
// @brief configuration for factorization machine
package dmlc.difacto;
message Config {
// -- basic settings - input & output --
/// The training data, can be either a directory or a wildcard filename
optional string train_data = 1;
/// The validation or test data, can be either a directory or a wildcard filename
optional string val_data = 2;
/// data format. supports libsvm, crb, criteo, adfea, ...
optional string data_format = 4 [default = "libsvm"];
/// model output filename
optional string model_out = 5;
/// model input filename
optional string model_in = 7;
/// the filename for prediction output. if specified, then run
/// prediction. otherwise run training
optional string predict_out = 9;
// -- basic settings - objective and optimization --
// - w -
/// l1 regularizer for :math:`w`: :math:`\lambda_1 |w|_1`
optional float lambda_l1 = 12 [default = 1];
/// l2 regularizer for :math:`w`: :math:`\lambda_2 \|w\|_2^2`
optional float lambda_l2 = 13 [default = 0];
/// learning rate :math:`\eta` (or :math:`\alpha`) for :math:`w`
optional float lr_eta = 14 [default = .01];
// - V -
/// for embedding :math:`V`
message Embedding {
/// -- model --
/// the embedding dimension :math:`k`
optional int32 dim = 1;
/// features with occurence < threshold have no embedding (:math:`k=0`)
optional int32 threshold = 2;
/// l2 regularizer for :math:`V`: :math:`\lambda_2 \|V_i\|_2^2`
optional float lambda_l2 = 3;
/// -- learning --
/// learning rate :math:`\eta` for :math:`V`. if not specified, then share the same with :math:`w`
optional float lr_eta = 4 [default = .01];
/// V is initialized by uniformly random weight in
/// [-init_scale, +init_scale]
optional float init_scale = 6 [default = .01];
/// -- advanced --
/// leanring rate :math:`\beta` for :math:`V`.
optional float lr_beta = 5 [default = 1];
/// apply dropout on the gradient of :math:`V`. no in default
optional float dropout = 7 [default = 0];
/// project the gradient of :math:`V` into :math:`[-c c]`. no in default
optional float grad_clipping = 8 [default = 0];
/// normalized the l2-norm of gradient of :math:`V`. no in default
optional float grad_normalization = 9 [default = 0];
}
/// the embedding :math:`V`
repeated Embedding embedding = 15;
/// - learning -
/// the size of minibatch. the smaller, the faster the convergence, but the
/// slower the system performance
optional int32 minibatch = 22 [default = 1000];
/// the maximal number of data passes
optional int32 max_data_pass= 23 [default = 10];
/// stop earilier based on validation
optional bool early_stop = 24 [default = false];
/// -- advanced --
/// - data -
/// save model for every k data pass. default is -1, which only saves for the
/// last iteration
optional int32 save_iter = 90 [default = -1];
/// load model from the k-th iteration. default is -1, which loads the last
/// iteration model
optional int32 load_iter = 91 [default = -1];
/// give a worker the data only if it can access. often used when the data has
/// been dispatched to workers' local filesystem
optional bool local_data = 101 [default = false];
/// virtually partition a file into n parts for better loadbalance. default is 10
optional int32 num_parts_per_file = 102 [default = 10];
/// randomly shuffle data for minibatch SGD. a minibatch is randomly picked from
/// rand_shuffle * minibatch examples. default is 10.
optional int32 rand_shuffle = 103 [default = 10];
/// down sampling negative examples in the training data. no in default
optional float neg_sampling = 104 [default = 1.0];
/// if true, then outputs a probability prediction. otherwise :math:`\langle x, y \rangle`
optional bool prob_predict = 105 [default = true];
/// - learning -
/// print the progress every n sec during training. 1 sec in default
optional float print_sec = 111 [default = 1];
/// learning rate :math:`\beta`, 1 in default
optional float lr_beta = 112 [default = 1];
/// the minimal objective decrease in early stop
optional float min_objv_decr = 119 [default = .00001];
/// the maximal allowed objective
optional float max_objv = 118;
/// use or not use the contraint :math:`V_i = 0` if :math:`w_i = 0`. yes in default
optional bool l1_shrk = 114 [default = true];
/// optional bool grad_normalization = 115 [default = false];
/// - system performance -
/// number of threads used within a worker and a server
optional int32 num_threads = 121 [default = 2];
/// the maximal concurrent minibatches being processing at the same time for
/// sgd, and the maximal concurrent blocks for block CD. 2 in default.
optional int32 max_concurrency = 122 [default = 2];
/// cache the key list on both sender and receiver to reduce communication
/// cost. it may increase the memory usage
optional bool key_cache = 123 [default = true];
/// compression the message to reduce communication cost. it may increase the
/// computation cost.
optional bool msg_compression = 124 [default = true];
/// convert floating-points into fixed-point integers with n bytes. n can be 1,
/// 2 and 3. 0 means no compression.
optional int32 fixed_bytes = 125 [default = 0];
}