24
24
'GradientClipByValue' ,
25
25
'GradientClipByNorm' ,
26
26
'GradientClipByGlobalNorm' ,
27
- 'append_gradient_clip_ops' ,
28
- 'error_clip_callback' ,
29
27
]
30
28
31
29
@@ -38,6 +36,25 @@ def append_clip_op(self, block, grad_name):
38
36
39
37
40
38
class ErrorClipByValue (BaseErrorClipAttr ):
39
+ """
40
+ Clips tensor values to the range [min, max].
41
+
42
+ Given a tensor t, this operation clips its value to min and max inplace.
43
+
44
+ - Any values less than min are set to min.
45
+ - Any values greater than max are set to max.
46
+
47
+ Args:
48
+ max (float): The maximum value to clip by.
49
+ min (float, optional): The minimum value to clip by. if not set by user, \
50
+ will be set to -max by framework.
51
+
52
+ Examples:
53
+ .. code-block:: python
54
+
55
+ var = fluid.framework.Variable(..., error_clip=ErrorClipByValue(max=5.0), ...)
56
+ """
57
+
41
58
def __init__ (self , max , min = None ):
42
59
max = float (max )
43
60
if min is None :
@@ -99,6 +116,31 @@ def create_operators(self, param, grad):
99
116
100
117
101
118
class GradientClipByValue (BaseGradientClipAttr ):
119
+ """
120
+ Clips gradient values to the range [min, max].
121
+
122
+ Given a tensor t, this operation clips its value to min and max inplace.
123
+
124
+ - Any values less than min are set to min.
125
+ - Any values greater than max are set to max.
126
+
127
+ Args:
128
+ max (float): The maximum value to clip by.
129
+ min (float, optional): The minimum value to clip by. if not set by user, \
130
+ will be set to -max by framework.
131
+
132
+ Examples:
133
+ .. code-block:: python
134
+
135
+ w_param_attrs = ParamAttr(name=None,
136
+ initializer=UniformInitializer(low=-1.0, high=1.0, seed=0),
137
+ learning_rate=1.0,
138
+ regularizer=L1Decay(1.0),
139
+ trainable=True,
140
+ clip=GradientClipByValue(-1.0, 1.0))
141
+ y_predict = fluid.layers.fc(input=x, size=1, param_attr=w_param_attrs)
142
+ """
143
+
102
144
def __init__ (self , max , min = None ):
103
145
max = float (max )
104
146
if min is None :
@@ -120,6 +162,37 @@ def create_operators(self, param, grad):
120
162
121
163
122
164
class GradientClipByNorm (BaseGradientClipAttr ):
165
+ """
166
+ Clips tensor values to a maximum L2-norm.
167
+
168
+ This operator limits the L2 norm of the input :math:`X` within :math:`max\_norm`.
169
+ If the L2 norm of :math:`X` is less than or equal to :math:`max\_norm`, :math:`Out`
170
+ will be the same as :math:`X`. If the L2 norm of :math:`X` is greater than
171
+ :math:`max\_norm`, :math:`X` will be linearly scaled to make the L2 norm of
172
+ :math:`Out` equal to :math:`max\_norm`, as shown in the following formula:
173
+
174
+ .. math::
175
+
176
+ Out = \\ frac{max\_norm * X}{norm(X)},
177
+
178
+ where :math:`norm(X)` represents the L2 norm of :math:`X`.
179
+
180
+ Args:
181
+ clip_norm (float): The maximum norm value
182
+
183
+ Examples:
184
+ .. code-block:: python
185
+
186
+ w_param_attrs = ParamAttr(name=None,
187
+ initializer=UniformInitializer(low=-1.0, high=1.0, seed=0),
188
+ learning_rate=1.0,
189
+ regularizer=L1Decay(1.0),
190
+ trainable=True,
191
+ clip=GradientClipByNorm(clip_norm=2.0))
192
+ y_predict = fluid.layers.fc(input=x, size=1, param_attr=w_param_attrs)
193
+
194
+ """
195
+
123
196
def __init__ (self , clip_norm ):
124
197
self .clip_norm = clip_norm
125
198
@@ -135,6 +208,44 @@ def create_operators(self, param, grad):
135
208
136
209
137
210
class GradientClipByGlobalNorm (BaseGradientClipAttr ):
211
+ """
212
+ Clips values of multiple tensors by the ratio of the sum of their norms.
213
+
214
+ Given a list of tensors t_list, and a clipping ratio clip_norm, this
215
+ operation returns a list of clipped tensors list_clipped and the global
216
+ norm (global_norm) of all tensors in t_list.
217
+
218
+ To perform the clipping, the values :math:`t\_list[i]` are set to:
219
+
220
+ .. math::
221
+
222
+ t\_list[i] = t\_list[i] * \\ frac{clip\_norm}{\max(global\_norm, clip\_norm)}
223
+
224
+ where:
225
+
226
+ .. math::
227
+
228
+ global\_norm = \sqrt{\sum_{i=0}^{N-1}(l2norm(t\_list[i]))^2}
229
+
230
+ If :math:`clip\_norm > global\_norm` then the entries in t_list remain as they are,
231
+ otherwise they're all shrunk by the global ratio.
232
+
233
+ Args:
234
+ clip_norm (float): The maximum norm value
235
+ group_name (str, optional): The group name for this clip.
236
+
237
+ Examples:
238
+ .. code-block:: python
239
+
240
+ p_g_clip = fluid.backward.append_backward(loss=avg_cost_clip)
241
+
242
+ with fluid.program_guard(main_program=prog_clip):
243
+ fluid.clip.set_gradient_clip(
244
+ fluid.clip.GradientClipByGlobalNorm(clip_norm=2.0))
245
+ p_g_clip = fluid.clip.append_gradient_clip_ops(p_g_clip)
246
+
247
+ """
248
+
138
249
def __init__ (self , clip_norm , group_name = "default_group" ):
139
250
if not isinstance (group_name , basestring ):
140
251
raise TypeError ("'group_name' must be a basestring." )
@@ -183,15 +294,16 @@ def create_operators(self, param, grad):
183
294
184
295
def set_gradient_clip (clip , param_list = None , program = None ):
185
296
"""
186
- To specify parameters that require gradient clip.
187
- Args:
188
- clip(BaseGradientClipAttr): An instance of some derived class of BaseGradientClipAttr,
189
- which describes the type and detailed attributes of required gradient clip.
190
- param_list(list, None by default): Parameters that require gradient clip.
191
- It can be a list of parameter or a list of parameter's name.
192
- When it's None, all parameters in the program will be included.
193
- program(Program, None by default): The program where parameters are.
194
- Will be the default main program when assigned with None.
297
+ To specify parameters that require gradient clip.
298
+
299
+ Args:
300
+ clip(BaseGradientClipAttr): An instance of some derived class of BaseGradientClipAttr,
301
+ which describes the type and detailed attributes of required gradient clip.
302
+ param_list(list(Variable)): Parameters that require gradient clip.
303
+ It can be a list of parameter or a list of parameter's name.
304
+ When it's None, all parameters in the program will be included.
305
+ program(Program): The program where parameters are.
306
+ Will be the default main program when assigned with None.
195
307
"""
196
308
if not isinstance (clip , BaseGradientClipAttr ):
197
309
raise TypeError (
0 commit comments