16
16
17
17
import os
18
18
import pickle
19
- import time
19
+ import shutil
20
20
import subprocess
21
21
from .parse_ops import get_key_from_op
22
22
from .extract_features import get_data_from_tables , get_features_from_paramkey
@@ -71,15 +71,16 @@ def __init__(self, table_file='SD710'):
71
71
self .hardware = None
72
72
self .threads = None
73
73
self .predictor_state = False
74
+ self .predictor = {}
74
75
self ._initial_table ()
75
76
76
77
def _initial_table (self ):
77
78
if self .table_file in ['SD625' , 'SD710' , 'SD845' , 'SD865' ]:
78
79
self .hardware = self .table_file
79
- if self .hardware in ['SD625' , 'SD710' ]:
80
- self .predictor_state = True
81
80
self .threads = 4
82
81
self .table_file = f'{ self .hardware } _threads_4_power_mode_0.pkl'
82
+ if self .hardware in ['SD625' , 'SD710' ]:
83
+ self .predictor_state = True
83
84
if not os .path .exists (self .table_file ):
84
85
subprocess .call (
85
86
f'wget https://paddlemodels.bj.bcebos.com/PaddleSlim/analysis/{ self .table_file } ' ,
@@ -115,6 +116,19 @@ def _get_input_shape(self, graph):
115
116
break
116
117
return in_shape
117
118
119
+ def _preload_predictor (self , data_type = 'fp32' ):
120
+ op_types = [
121
+ 'depthwise_conv2d' , 'conv2d' , 'pool2d' , 'matmul' , 'elementwise_add' ,
122
+ 'elementwise_mul' , 'concat' , 'calib' , 'swish'
123
+ ]
124
+ op_dir = self .table_file .split ('.' )[0 ] + '_batchsize_1'
125
+ for op_type in op_types :
126
+ model = load_predictor (op_type , op_dir , data_type )
127
+ key = op_type
128
+ if 'conv2d' in op_type :
129
+ key = f'{ op_type } _{ data_type } '
130
+ self .predictor [key ] = model
131
+
118
132
def predict (self ,
119
133
model_file ,
120
134
param_file ,
@@ -125,22 +139,27 @@ def predict(self,
125
139
126
140
Args:
127
141
model_file(str), param_file(str): The inference model(*.pdmodel, *.pdiparams).
128
- data_type(str): Data type, fp32 or int8. Default : fp32
142
+ data_type(str): Data type, fp32, fp16 or int8.
129
143
threads(int): threads num
130
144
input_shape(list): Generally, the input shape is confirmed when saving the inference model and the parameter is only effective for input shape that has variable length.
131
145
Returns:
132
146
latency(float): The latency of the model.
133
147
"""
134
- assert data_type in ['fp32' , 'int8'
135
- ], f'data_type must be one of [fp32, int8]'
148
+ assert data_type in ['fp32' , 'int8' , 'fp16'
149
+ ], f'data_type must be one of [fp32, int8, fp16 ]'
136
150
137
151
if self .hardware and self .threads != threads :
138
152
self ._change_table (threads )
139
153
154
+ if self .predictor_state and f'conv2d_{ data_type } ' not in self .predictor :
155
+ self ._preload_predictor (data_type )
156
+
157
+ enable_fp16 = True if data_type == 'fp16' else False
140
158
pbmodel_file = opt_model (
141
159
model_file = model_file ,
142
160
param_file = param_file ,
143
- optimize_out_type = 'protobuf' , )
161
+ optimize_out_type = 'protobuf' ,
162
+ enable_fp16 = enable_fp16 )
144
163
145
164
paddle .enable_static ()
146
165
with open (pbmodel_file , "rb" ) as f :
@@ -176,7 +195,7 @@ def predict(self,
176
195
warnings .warn ("OperatorType\t CalledTimes" )
177
196
for key in new_op :
178
197
warnings .warn (f"{ key .ljust (15 )} \t { new_op [key ]} " )
179
-
198
+ shutil . rmtree ( os . path . dirname ( pbmodel_file ))
180
199
return latency
181
200
182
201
def op_predictor (self , op_type , param_key , data_type ):
@@ -185,18 +204,20 @@ def op_predictor(self, op_type, param_key, data_type):
185
204
Args:
186
205
op_type: The operator's type
187
206
param_key: The operator's parameter information.
188
- data_type: Data type, fp32 or int8. Default : int8
207
+ data_type: Data type, fp32 or int8.
189
208
Returns:
190
209
latency(float): The latency of the operator.
191
210
"""
192
211
193
212
latency = 0.0
194
- op_dir = self .table_file .split ('.' )[0 ] + '_batchsize_1'
195
213
if op_type in [
196
214
'depthwise_conv2d' , 'conv2d' , 'pool2d' , 'matmul' ,
197
215
'elementwise_add' , 'elementwise_mul' , 'concat' , 'calib' , 'swish'
198
216
]:
199
- predictor = load_predictor (op_type , op_dir , data_type )
217
+ key = op_type
218
+ if 'conv2d' in op_type :
219
+ key = f'{ op_type } _{ data_type } '
220
+ predictor = self .predictor [key ]
200
221
features = get_features_from_paramkey (param_key , op_type , data_type )
201
222
latency = predictor .predict ([features ])
202
223
else :
0 commit comments