|
| 1 | +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. |
| 2 | +# |
| 3 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +# you may not use this file except in compliance with the License. |
| 5 | +# You may obtain a copy of the License at |
| 6 | +# |
| 7 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +# |
| 9 | +# Unless required by applicable law or agreed to in writing, software |
| 10 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +# See the License for the specific language governing permissions and |
| 13 | +# limitations under the License. |
| 14 | + |
| 15 | +# The three sets listed below are changed dynamiclly. They don't contain all |
| 16 | +# paddle ops currently. |
| 17 | + |
| 18 | +# The set of ops that support fp16 calculation and are considered numerically- |
| 19 | +# safe and performance-critical. These ops are always converted to fp16. |
| 20 | +white_list = { |
| 21 | + 'conv2d', |
| 22 | + 'matmul', |
| 23 | + 'mul', |
| 24 | +} |
| 25 | + |
| 26 | +# The set of ops that support fp16 calculation and are considered numerically- |
| 27 | +# dangerous and whose effects may also be observed in downstream ops. |
| 28 | +black_list = { |
| 29 | + 'exp', |
| 30 | + 'square', |
| 31 | + 'log', |
| 32 | + 'mean', |
| 33 | + 'sum', |
| 34 | + 'cos_sim', |
| 35 | + 'softmax', |
| 36 | + 'softmax_with_cross_entropy', |
| 37 | + 'sigmoid_cross_entropy_with_logits', |
| 38 | + 'cross_entropy', |
| 39 | + 'cross_entropy2', |
| 40 | +} |
| 41 | + |
| 42 | +# This set contains two types of ops. All ops supported fp16 calculation. One |
| 43 | +# of two types is considered numerically-safe, but may be made unsafe by an |
| 44 | +# updtream blacklist op. Another type do not have numerically-significant |
| 45 | +# effects, like stack, flatten2. |
| 46 | +gray_list = { |
| 47 | + 'elementwise_add', |
| 48 | + 'elementwise_sub', |
| 49 | + 'elementwise_mul', |
| 50 | + 'elementwise_div', |
| 51 | + 'elementwise_max', |
| 52 | + 'elementwise_min', |
| 53 | + 'elementwise_pow', |
| 54 | + 'elementwise_mod', |
| 55 | + 'elementwise_floordiv', |
| 56 | + 'tanh', |
| 57 | + 'sigmoid', |
| 58 | + 'lookup_table', |
| 59 | + 'top_k', |
| 60 | + 'pool2d', |
| 61 | + 'pool3d', |
| 62 | + 'dropout', |
| 63 | + 'relu', |
| 64 | + 'relu6', |
| 65 | + 'leaky_relu', |
| 66 | + 'soft_relu', |
| 67 | + 'flatten2', |
| 68 | + 'stack', |
| 69 | + 'unstack', |
| 70 | + 'uniform_random_batch_size_like', |
| 71 | + 'gaussian_random', |
| 72 | + 'gaussian_random_batch_size_like', |
| 73 | + 'slice', |
| 74 | + 'rank', |
| 75 | + 'scale', |
| 76 | + 'transpose2', |
| 77 | + 'reshape2', |
| 78 | + 'gather', |
| 79 | + 'fill_constant', |
| 80 | + 'get_tensor_from_selected_rows', |
| 81 | + 'sign', |
| 82 | + 'cast', |
| 83 | +} |
| 84 | +''' |
| 85 | +# The set of ops that don't support fp16 calculation |
| 86 | +unsupported_fp16_list = { |
| 87 | + # from python/paddle/fluid/layers/io.py |
| 88 | + 'send', |
| 89 | + 'send_barrier', |
| 90 | + 'recv', |
| 91 | + 'fetch_barrier', |
| 92 | + 'create_recordio_file_reader', |
| 93 | + 'create_random_data_generator', |
| 94 | + 'create_py_reader', |
| 95 | + 'create_shuffle_reader', |
| 96 | + 'create_batch_reader', |
| 97 | + 'create_double_buffer_reader', |
| 98 | + 'create_multi_pass_reader', |
| 99 | + 'read', |
| 100 | + 'load', |
| 101 | + |
| 102 | + # from python/paddle/fluid/control_flow.py |
| 103 | + 'increment', |
| 104 | + 'less_than', |
| 105 | + 'less_equal', |
| 106 | + 'greater_than', |
| 107 | + 'greater_equal', |
| 108 | + 'equal', |
| 109 | + 'not_equal', |
| 110 | + 'read_from_array', |
| 111 | + 'shrink_rnn_memory', |
| 112 | + 'lod_array_length', |
| 113 | + 'logical_and', |
| 114 | + 'logical_or', |
| 115 | + 'logical_xor', |
| 116 | + 'logical_not', |
| 117 | + 'print', |
| 118 | + 'conditional_block', |
| 119 | + 'while', |
| 120 | + 'ifelse', |
| 121 | + 'is_empty', |
| 122 | +
|
| 123 | + 'lstm', |
| 124 | + 'cudnn_lstm', |
| 125 | + 'lstmp', |
| 126 | + 'gru', |
| 127 | + 'gru_unit', |
| 128 | + 'linear_chain_crf', |
| 129 | + 'crf_decoding', |
| 130 | + 'bpr_loss', |
| 131 | + 'chunk_eval', |
| 132 | + 'sequence_conv', |
| 133 | + 'sequence_softmax', |
| 134 | + # Depthwise conv2d isn't fast and safe currently. |
| 135 | + # ref: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/grappler/optimizers/auto_mixed_precision_lists.h#L79 |
| 136 | + 'depthwise_conv2d', |
| 137 | + # Tensor Core kernels are not available for 3D convolutions currently. |
| 138 | + 'conv3d', |
| 139 | + 'sequence_pool', |
| 140 | + 'sequence_concat', |
| 141 | + 'sequence_slice', |
| 142 | + 'data_norm', |
| 143 | + 'layer_norm', |
| 144 | + 'group_norm', |
| 145 | + 'spectral_norm', |
| 146 | + 'depthwise_conv2d_transpose', |
| 147 | + 'sequence_expand', |
| 148 | + 'conv_transposed2d', |
| 149 | + 'conv_transposed3d', |
| 150 | + 'sequence_expand_as', |
| 151 | + 'sequence_pad', |
| 152 | + 'sequence_unpad', |
| 153 | + 'sequence_erase', |
| 154 | + 'beam_search', |
| 155 | + 'beam_search_decode', |
| 156 | + 'lstm_unit', |
| 157 | + 'reduce_sum', |
| 158 | + 'reduce_mean', |
| 159 | + 'reduce_max', |
| 160 | + 'reduce_min', |
| 161 | + 'reduce_prod', |
| 162 | + 'reduce_all', |
| 163 | + 'reduce_any', |
| 164 | + 'split', |
| 165 | + 'edit_distance', |
| 166 | + 'ctc_align', |
| 167 | + 'warpctc', |
| 168 | + 'sequence_reshape', |
| 169 | + 'nce', |
| 170 | + 'hierarchical_sigmoid', |
| 171 | + 'im2sequence', |
| 172 | + 'row_conv', |
| 173 | + 'multiplex', |
| 174 | + 'sample_logits', |
| 175 | + 'one_hot', |
| 176 | + 'smooth_l1_loss', |
| 177 | + 'squeeze2', |
| 178 | + 'unsqueeze2', |
| 179 | + 'lod_reset', |
| 180 | + 'lrn', |
| 181 | + 'pad', |
| 182 | + 'pad_constant_like', |
| 183 | + 'label_smooth', |
| 184 | + 'scatter', |
| 185 | + 'sequence_scatter', |
| 186 | + 'random_crop', |
| 187 | + 'mean_iou', |
| 188 | + 'selu', |
| 189 | + 'crop', |
| 190 | + 'affine_grid', |
| 191 | + 'rank_loss', |
| 192 | + 'margin_rank_loss', |
| 193 | + 'pad2d', |
| 194 | + 'elu', |
| 195 | + 'pow', |
| 196 | + 'stanh', |
| 197 | + 'hard_sigmoid', |
| 198 | + 'swish', |
| 199 | + 'prelu', |
| 200 | + 'brelu', |
| 201 | + 'sequence_enumerate', |
| 202 | + 'sequence_mask', |
| 203 | + 'expand', |
| 204 | + 'sampling_id', |
| 205 | + 'maxout', |
| 206 | + 'space_to_depth', |
| 207 | + 'sequence_reverse', |
| 208 | + 'similarity_focus', |
| 209 | + 'hash', |
| 210 | + 'grid_sampler', |
| 211 | + 'log_loss', |
| 212 | + 'teacher_student_sigmoid_loss', |
| 213 | + 'add_position_encoding', |
| 214 | + 'bilinear_tensor_product', |
| 215 | + 'shuffle_channel', |
| 216 | + 'temporal_shift', |
| 217 | + 'psroi_pool', |
| 218 | + 'huber_loss', |
| 219 | + 'kldiv_loss', |
| 220 | + 'tree_conv', |
| 221 | + 'pixel_shuffle', |
| 222 | + 'fsp', |
| 223 | + 'cvm', |
| 224 | +
|
| 225 | + 'affine_channel', |
| 226 | + 'roi_pool', |
| 227 | + 'roi_align', |
| 228 | + 'anchor_generator', |
| 229 | + 'generate_proposals', |
| 230 | + 'generate_proposal_labels', |
| 231 | + 'generate_mask_labels', |
| 232 | + |
| 233 | +} |
| 234 | +''' |
0 commit comments