@@ -156,16 +156,54 @@ def create_predictor(cls, args):
156
156
use_calib_mode = False )
157
157
print ("Enable TensorRT is: {}" .format (
158
158
config .tensorrt_engine_enabled ()))
159
- if args .collect_shape :
160
- config .collect_shape_range_info (
161
- os .path .join (
162
- os .path .dirname (args .model_path ), args .task_name +
163
- '_shape_range_info.pbtxt' ))
159
+
160
+ # Set min/max/opt tensor shape of each trt subgraph input.
161
+ if args .int8 :
162
+ min_batch_size , max_batch_size , opt_batch_size = 16 , 32 , 32
163
+ min_seq_len , max_seq_len , opt_seq_len = 31 , 128 , 32
164
+
165
+ min_input_shape = {
166
+ "faster_tokenizer_2.tmp_0" : [min_batch_size , min_seq_len ],
167
+ "faster_tokenizer_2.tmp_1" : [min_batch_size , min_seq_len ],
168
+ "tmp_4" : [min_batch_size , min_seq_len ],
169
+ "unsqueeze2_0.tmp_0" : [min_batch_size , 1 , 1 , min_seq_len ],
170
+ }
171
+ max_input_shape = {
172
+ "faster_tokenizer_2.tmp_0" : [max_batch_size , max_seq_len ],
173
+ "faster_tokenizer_2.tmp_1" : [max_batch_size , max_seq_len ],
174
+ "tmp_4" : [max_batch_size , max_seq_len ],
175
+ "unsqueeze2_0.tmp_0" : [max_batch_size , 1 , 1 , max_seq_len ],
176
+ }
177
+ opt_input_shape = {
178
+ "faster_tokenizer_2.tmp_0" : [opt_batch_size , opt_seq_len ],
179
+ "faster_tokenizer_2.tmp_1" : [opt_batch_size , opt_seq_len ],
180
+ "tmp_4" : [opt_batch_size , opt_seq_len ],
181
+ "unsqueeze2_0.tmp_0" : [opt_batch_size , 1 , 1 , opt_seq_len ],
182
+ }
164
183
else :
165
- config .enable_tuned_tensorrt_dynamic_shape (
166
- os .path .join (
167
- os .path .dirname (args .model_path ),
168
- args .task_name + "_shape_range_info.pbtxt" ), True )
184
+ min_batch_size , max_batch_size , opt_batch_size = 16 , 32 , 32
185
+ min_seq_len , max_seq_len , opt_seq_len = 31 , 128 , 32
186
+
187
+ min_input_shape = {
188
+ "faster_tokenizer_1.tmp_0" : [min_batch_size , min_seq_len ],
189
+ "faster_tokenizer_1.tmp_1" : [min_batch_size , min_seq_len ],
190
+ "tmp_4" : [min_batch_size , min_seq_len ],
191
+ "unsqueeze2_0.tmp_0" : [min_batch_size , 1 , 1 , min_seq_len ],
192
+ }
193
+ max_input_shape = {
194
+ "faster_tokenizer_1.tmp_0" : [max_batch_size , max_seq_len ],
195
+ "faster_tokenizer_1.tmp_1" : [max_batch_size , max_seq_len ],
196
+ "tmp_4" : [max_batch_size , max_seq_len ],
197
+ "unsqueeze2_0.tmp_0" : [max_batch_size , 1 , 1 , max_seq_len ],
198
+ }
199
+ opt_input_shape = {
200
+ "faster_tokenizer_1.tmp_0" : [opt_batch_size , opt_seq_len ],
201
+ "faster_tokenizer_1.tmp_1" : [opt_batch_size , opt_seq_len ],
202
+ "tmp_4" : [opt_batch_size , opt_seq_len ],
203
+ "unsqueeze2_0.tmp_0" : [opt_batch_size , 1 , 1 , opt_seq_len ],
204
+ }
205
+ config .set_trt_dynamic_shape_info (min_input_shape , max_input_shape ,
206
+ opt_input_shape )
169
207
170
208
predictor = paddle .inference .create_predictor (config )
171
209
0 commit comments