Skip to content

Commit a9da6ea

Browse files
author
Yibing Liu
authored
init black/white lists (#17847) (#18309)
test=release/1.5
1 parent 0fad63a commit a9da6ea

File tree

3 files changed

+414
-1
lines changed

3 files changed

+414
-1
lines changed

python/paddle/fluid/contrib/mixed_precision/decorator.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from ... import unique_name
1919
from . import fp16_utils
2020
from .fp16_utils import create_master_params_grads, master_param_to_train_param
21-
from .fp16_utils import update_loss_scaling
21+
from .fp16_utils import update_loss_scaling, rewrite_program
2222

2323
__all__ = ["decorate"]
2424

@@ -120,6 +120,7 @@ def backward(self,
120120
A list of (param, grad), which is a tuple of a parameter and its
121121
gradient respectively, and the scaled loss.
122122
"""
123+
rewrite_program(self._train_program)
123124
scaled_loss = loss * self._loss_scaling
124125
self._param_grads = self._optimizer.backward(
125126
scaled_loss, startup_program, parameter_list, no_grad_set,
Lines changed: 234 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,234 @@
1+
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# The three sets listed below are changed dynamiclly. They don't contain all
16+
# paddle ops currently.
17+
18+
# The set of ops that support fp16 calculation and are considered numerically-
19+
# safe and performance-critical. These ops are always converted to fp16.
20+
white_list = {
21+
'conv2d',
22+
'matmul',
23+
'mul',
24+
}
25+
26+
# The set of ops that support fp16 calculation and are considered numerically-
27+
# dangerous and whose effects may also be observed in downstream ops.
28+
black_list = {
29+
'exp',
30+
'square',
31+
'log',
32+
'mean',
33+
'sum',
34+
'cos_sim',
35+
'softmax',
36+
'softmax_with_cross_entropy',
37+
'sigmoid_cross_entropy_with_logits',
38+
'cross_entropy',
39+
'cross_entropy2',
40+
}
41+
42+
# This set contains two types of ops. All ops supported fp16 calculation. One
43+
# of two types is considered numerically-safe, but may be made unsafe by an
44+
# updtream blacklist op. Another type do not have numerically-significant
45+
# effects, like stack, flatten2.
46+
gray_list = {
47+
'elementwise_add',
48+
'elementwise_sub',
49+
'elementwise_mul',
50+
'elementwise_div',
51+
'elementwise_max',
52+
'elementwise_min',
53+
'elementwise_pow',
54+
'elementwise_mod',
55+
'elementwise_floordiv',
56+
'tanh',
57+
'sigmoid',
58+
'lookup_table',
59+
'top_k',
60+
'pool2d',
61+
'pool3d',
62+
'dropout',
63+
'relu',
64+
'relu6',
65+
'leaky_relu',
66+
'soft_relu',
67+
'flatten2',
68+
'stack',
69+
'unstack',
70+
'uniform_random_batch_size_like',
71+
'gaussian_random',
72+
'gaussian_random_batch_size_like',
73+
'slice',
74+
'rank',
75+
'scale',
76+
'transpose2',
77+
'reshape2',
78+
'gather',
79+
'fill_constant',
80+
'get_tensor_from_selected_rows',
81+
'sign',
82+
'cast',
83+
}
84+
'''
85+
# The set of ops that don't support fp16 calculation
86+
unsupported_fp16_list = {
87+
# from python/paddle/fluid/layers/io.py
88+
'send',
89+
'send_barrier',
90+
'recv',
91+
'fetch_barrier',
92+
'create_recordio_file_reader',
93+
'create_random_data_generator',
94+
'create_py_reader',
95+
'create_shuffle_reader',
96+
'create_batch_reader',
97+
'create_double_buffer_reader',
98+
'create_multi_pass_reader',
99+
'read',
100+
'load',
101+
102+
# from python/paddle/fluid/control_flow.py
103+
'increment',
104+
'less_than',
105+
'less_equal',
106+
'greater_than',
107+
'greater_equal',
108+
'equal',
109+
'not_equal',
110+
'read_from_array',
111+
'shrink_rnn_memory',
112+
'lod_array_length',
113+
'logical_and',
114+
'logical_or',
115+
'logical_xor',
116+
'logical_not',
117+
'print',
118+
'conditional_block',
119+
'while',
120+
'ifelse',
121+
'is_empty',
122+
123+
'lstm',
124+
'cudnn_lstm',
125+
'lstmp',
126+
'gru',
127+
'gru_unit',
128+
'linear_chain_crf',
129+
'crf_decoding',
130+
'bpr_loss',
131+
'chunk_eval',
132+
'sequence_conv',
133+
'sequence_softmax',
134+
# Depthwise conv2d isn't fast and safe currently.
135+
# ref: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/grappler/optimizers/auto_mixed_precision_lists.h#L79
136+
'depthwise_conv2d',
137+
# Tensor Core kernels are not available for 3D convolutions currently.
138+
'conv3d',
139+
'sequence_pool',
140+
'sequence_concat',
141+
'sequence_slice',
142+
'data_norm',
143+
'layer_norm',
144+
'group_norm',
145+
'spectral_norm',
146+
'depthwise_conv2d_transpose',
147+
'sequence_expand',
148+
'conv_transposed2d',
149+
'conv_transposed3d',
150+
'sequence_expand_as',
151+
'sequence_pad',
152+
'sequence_unpad',
153+
'sequence_erase',
154+
'beam_search',
155+
'beam_search_decode',
156+
'lstm_unit',
157+
'reduce_sum',
158+
'reduce_mean',
159+
'reduce_max',
160+
'reduce_min',
161+
'reduce_prod',
162+
'reduce_all',
163+
'reduce_any',
164+
'split',
165+
'edit_distance',
166+
'ctc_align',
167+
'warpctc',
168+
'sequence_reshape',
169+
'nce',
170+
'hierarchical_sigmoid',
171+
'im2sequence',
172+
'row_conv',
173+
'multiplex',
174+
'sample_logits',
175+
'one_hot',
176+
'smooth_l1_loss',
177+
'squeeze2',
178+
'unsqueeze2',
179+
'lod_reset',
180+
'lrn',
181+
'pad',
182+
'pad_constant_like',
183+
'label_smooth',
184+
'scatter',
185+
'sequence_scatter',
186+
'random_crop',
187+
'mean_iou',
188+
'selu',
189+
'crop',
190+
'affine_grid',
191+
'rank_loss',
192+
'margin_rank_loss',
193+
'pad2d',
194+
'elu',
195+
'pow',
196+
'stanh',
197+
'hard_sigmoid',
198+
'swish',
199+
'prelu',
200+
'brelu',
201+
'sequence_enumerate',
202+
'sequence_mask',
203+
'expand',
204+
'sampling_id',
205+
'maxout',
206+
'space_to_depth',
207+
'sequence_reverse',
208+
'similarity_focus',
209+
'hash',
210+
'grid_sampler',
211+
'log_loss',
212+
'teacher_student_sigmoid_loss',
213+
'add_position_encoding',
214+
'bilinear_tensor_product',
215+
'shuffle_channel',
216+
'temporal_shift',
217+
'psroi_pool',
218+
'huber_loss',
219+
'kldiv_loss',
220+
'tree_conv',
221+
'pixel_shuffle',
222+
'fsp',
223+
'cvm',
224+
225+
'affine_channel',
226+
'roi_pool',
227+
'roi_align',
228+
'anchor_generator',
229+
'generate_proposals',
230+
'generate_proposal_labels',
231+
'generate_mask_labels',
232+
233+
}
234+
'''

0 commit comments

Comments
 (0)