Skip to content
This repository was archived by the owner on Feb 3, 2025. It is now read-only.

Commit ea8f038

Browse files
author
DEKHTIARJonathan
committed
Runner / Args Refactoring and Improvements
1 parent 9e79a80 commit ea8f038

File tree

6 files changed

+191
-63
lines changed

6 files changed

+191
-63
lines changed

tftrt/examples/benchmark_args.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,18 @@ def __init__(self):
7272
default_sign_key
7373
))
7474

75+
self._parser.add_argument('--output_tensor_names', type=str,
76+
default=None,
77+
help='Output tensors\' name, defaults to all '
78+
'tensors available if not set. Will '
79+
'only work with `--use_tftrt`.')
80+
81+
self._parser.add_argument('--output_tensor_indices', type=str,
82+
default=None,
83+
help='Output tensors\' index, defaults to '
84+
'all tensors available if not set. Will '
85+
'only work without `--use_tftrt`.')
86+
7587
self._parser.add_argument('--num_iterations', type=int, default=None,
7688
help='How many iterations(batches) to '
7789
'evaluate. If not supplied, the whole '
@@ -157,6 +169,15 @@ def __init__(self):
157169
help='Whether to use implicit batch mode or dynamic shape mode.'
158170
)
159171

172+
# =========================== DEBUG Flags ========================== #
173+
174+
self._add_bool_argument(
175+
name="debug",
176+
default=False,
177+
required=False,
178+
help='If set to True, will print additional information.'
179+
)
180+
160181
def _add_bool_argument(self, name=None, default=False, required=False, help=None):
161182
if not isinstance(default, bool):
162183
raise ValueError()
@@ -205,7 +226,7 @@ def _validate_args(self, args):
205226
else:
206227
if args.use_xla:
207228
raise ValueError("--use_xla flag is not supported with TF-TRT.")
208-
229+
209230
if args.precision not in self.ALLOWED_TFTRT_PRECISION_MODES:
210231
raise ValueError("The received --precision={} is not supported."
211232
" Allowed: {}".format(
@@ -250,5 +271,6 @@ def parse_args(self):
250271

251272
print('\nBenchmark arguments:')
252273
_print_dict(vars(args))
274+
print()
253275

254276
return args

tftrt/examples/benchmark_runner.py

Lines changed: 120 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,10 @@
99
import logging
1010
import time
1111

12+
from collections import defaultdict
1213
from contextlib import contextmanager
1314
from functools import partial
15+
from operator import itemgetter
1416

1517
import numpy as np
1618
import tensorflow as tf
@@ -64,11 +66,11 @@ def before_benchmark(self, **kwargs):
6466
pass
6567

6668
@abc.abstractmethod
67-
def compute_accuracy_metric(self, batch_size, steps_executed, **kwargs):
69+
def compute_accuracy_metric(self, predictions, expected, **kwargs):
6870
raise NotImplementedError()
6971

7072
@abc.abstractmethod
71-
def process_model_output(self, outputs, batch_y, **kwargs):
73+
def process_model_output(self, outputs, **kwargs):
7274
raise NotImplementedError()
7375

7476
############################################################################
@@ -81,21 +83,26 @@ def __init__(
8183
output_saved_model_dir,
8284
allow_build_at_runtime=False,
8385
calibration_input_fn=None,
86+
debug=False,
8487
gpu_mem_cap=None,
8588
input_signature_key=DEFAULT_SERVING_SIGNATURE_DEF_KEY,
8689
max_workspace_size_bytes=DEFAULT_TRT_MAX_WORKSPACE_SIZE_BYTES,
8790
minimum_segment_size=5,
8891
num_calib_inputs=None,
8992
optimize_offline=False,
9093
optimize_offline_input_fn=None,
94+
output_tensor_indices=None,
95+
output_tensor_names=None,
9196
precision_mode=None,
9297
use_dynamic_shape=False,
93-
use_tftrt=False
98+
use_tftrt=False,
9499
):
95100

96101
logging.getLogger("tensorflow").setLevel(logging.INFO)
97102
logging.disable(logging.WARNING)
98103

104+
self._debug = debug
105+
99106
# TensorFlow can execute operations synchronously or asynchronously.
100107
# If asynchronous execution is enabled, operations may return
101108
# "non-ready" handles.
@@ -131,15 +138,17 @@ def __init__(
131138
use_tftrt=use_tftrt
132139
)
133140

141+
self._set_output_tensor_name(output_tensor_indices, output_tensor_names)
142+
134143
def _config_gpu_memory(self, gpu_mem_cap):
135144
gpus = tf.config.experimental.list_physical_devices('GPU')
136145

137146
if not gpus:
138147
raise RuntimeError("No GPUs has been found.")
139148

140-
print('Found the following GPUs:')
149+
self.debug_print('Found the following GPUs:')
141150
for gpu in gpus:
142-
print(' ', gpu)
151+
self.debug_print(f"\t- {gpu}")
143152

144153
for gpu in gpus:
145154
try:
@@ -153,6 +162,42 @@ def _config_gpu_memory(self, gpu_mem_cap):
153162
except RuntimeError as e:
154163
print('Can not set GPU memory config', e)
155164

165+
def _set_output_tensor_name(
166+
self, output_tensor_indices, output_tensor_names
167+
):
168+
structured_outputs = self._graph_func.structured_outputs
169+
170+
if isinstance(structured_outputs, (list, tuple)):
171+
if output_tensor_indices is None:
172+
output_tensor_indices = list(range(len(structured_outputs)))
173+
else:
174+
output_tensor_indices = [
175+
int(i) for i in output_tensor_indices.split(",")
176+
]
177+
178+
self._output_tensors = output_tensor_indices
179+
180+
elif isinstance(structured_outputs, dict):
181+
structured_outputs = dict(sorted(structured_outputs.items()))
182+
if output_tensor_names is None:
183+
output_tensor_names = list(structured_outputs.keys())
184+
else:
185+
output_tensor_names = [n for n in output_tensor_names.split(",")]
186+
for name in output_tensor_names:
187+
if name not in structured_outputs.keys():
188+
raise ValueError(
189+
f"Unknown output_tensor_names received: {name}. " \
190+
f"Authorized: {structured_outputs.keys()}")
191+
192+
self._output_tensors = output_tensor_names
193+
194+
else:
195+
raise RuntimeError('Unknown structured_outputs format received:',
196+
type(structured_outputs))
197+
198+
self.debug_print(f"Available Output Tensors: {structured_outputs}")
199+
self.debug_print(f"Chosen Output Tensor: {self._output_tensors}")
200+
156201
def _get_graph_func(
157202
self,
158203
input_saved_model_dir,
@@ -288,6 +333,10 @@ def _check_input_fn(func, name):
288333

289334
return graph_func
290335

336+
def debug_print(self, msg):
337+
if self._debug:
338+
print(f"[DEBUG] {msg}")
339+
291340
def execute_benchmark(
292341
self,
293342
batch_size,
@@ -317,7 +366,22 @@ def execute_benchmark(
317366
@_force_gpu_resync
318367
@tf.function(jit_compile=use_xla)
319368
def infer_step(_batch_x):
320-
return self._graph_func(_batch_x)
369+
output = self._graph_func(_batch_x)
370+
return itemgetter(*self._output_tensors)(output)
371+
372+
predicted_dict = defaultdict(lambda: [])
373+
expected_arr = []
374+
375+
def get_debug_output_shape_str(output):
376+
if isinstance(output, (tuple, list)):
377+
return [t.shape for t in output]
378+
379+
elif isinstance(output, dict):
380+
return {k: v.shape for k, v in output.items()}
381+
382+
else:
383+
return output.shape
384+
321385

322386
print("\nStart inference ...")
323387
for i, data_batch in enumerate(dataset):
@@ -348,19 +412,62 @@ def infer_step(_batch_x):
348412
))
349413

350414
if not skip_accuracy_testing:
351-
self.process_model_output(
352-
outputs=batch_preds,
353-
batch_y=batch_y,
354-
**kwargs
355-
)
415+
if i == 0:
416+
self.debug_print("=========== BEFORE PROCESSING ==========")
417+
debug_batch_preds = get_debug_output_shape_str(batch_preds)
418+
self.debug_print(f"`batch_preds`: {debug_batch_preds}")
419+
if batch_y is not None:
420+
self.debug_print(f"`batch_y` shape: {batch_y.shape}")
421+
422+
batch_preds = self.process_model_output(batch_preds, **kwargs)
423+
424+
if not isinstance(batch_preds, dict):
425+
raise ValueError(
426+
f"`self.process_model_output` did not return a dict. " \
427+
f"Received: {type(batch_preds)}"
428+
)
429+
430+
if batch_y is not None:
431+
batch_y = batch_y.numpy()
432+
if batch_y.shape[-1] == 1:
433+
batch_y = np.squeeze(batch_y, axis=-1)
434+
435+
if i == 0:
436+
self.debug_print("=========== AFTER PROCESSING ===========")
437+
debug_batch_preds = get_debug_output_shape_str(batch_preds)
438+
self.debug_print(f"`batch_preds`: {debug_batch_preds}")
439+
if batch_y is not None:
440+
self.debug_print(f"`batch_y` shape: {batch_y.shape}")
441+
self.debug_print("========================================")
442+
443+
for key, value in batch_preds.items():
444+
predicted_dict[key].append(value)
445+
446+
if batch_y is not None:
447+
expected_arr.append(batch_y)
356448

357449
if (i + 1) >= num_iterations:
358450
break
359451

360452
if not skip_accuracy_testing:
453+
predicted_dict = {
454+
k: np.concatenate(v, axis=0)
455+
for k, v in predicted_dict.items()
456+
}
457+
if expected_arr:
458+
expected_arr = np.concatenate(expected_arr, axis=0)
459+
else:
460+
expected_arr = np.array(expected_arr)
461+
462+
self.debug_print("=========== BEFORE METRIC COMPUTATION ==========")
463+
debug_predicted_dict = get_debug_output_shape_str(predicted_dict)
464+
self.debug_print(f"`predicted_dict`: {debug_predicted_dict}")
465+
self.debug_print(f"`expected_arr` shape: {expected_arr.shape}")
466+
self.debug_print("========================================")
467+
361468
results['accuracy_metric'] = self.compute_accuracy_metric(
362-
batch_size=batch_size,
363-
steps_executed=steps_executed,
469+
predictions=predicted_dict,
470+
expected=expected_arr,
364471
**kwargs
365472
)
366473

tftrt/examples/image_classification/image_classification.py

Lines changed: 20 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -65,33 +65,16 @@ class BenchmarkRunner(BaseBenchmarkRunner):
6565
ACCURACY_METRIC_NAME = "accuracy"
6666

6767
def before_benchmark(self, **kwargs):
68-
self._adjust = 1 if kwargs["num_classes"] == 1001 else 0
69-
self._corrects = 0
70-
71-
try:
72-
self._output_tensorname = list(
73-
self._graph_func.structured_outputs.keys()
74-
)[0]
75-
except AttributeError:
76-
# Output tensor doesn't have a name, index 0
77-
self._output_tensorname = 0
78-
79-
def compute_accuracy_metric(self, batch_size, steps_executed, **kwargs):
80-
return self._corrects / (batch_size * steps_executed)
81-
82-
def _eval_fn(self, preds, labels, adjust):
83-
"""Measures number of correct predicted labels in a batch.
84-
Assumes preds and labels are numpy arrays.
85-
"""
86-
preds = np.argmax(preds, axis=1).reshape(-1) - adjust
87-
return np.sum((labels.reshape(-1) == preds).astype(np.float32))
88-
89-
def process_model_output(self, outputs, batch_y, **kwargs):
90-
self._corrects += self._eval_fn(
91-
preds=outputs[self._output_tensorname].numpy(),
92-
labels=batch_y.numpy(),
93-
adjust=self._adjust
94-
)
68+
self._labels_shift = 1 if kwargs["num_classes"] == 1001 else 0
69+
70+
def compute_accuracy_metric(self, predictions, expected, **kwargs):
71+
return np.mean(np.equal(predictions["outputs"], expected))
72+
73+
def process_model_output(self, outputs, **kwargs):
74+
outputs = outputs.numpy()
75+
if (len(outputs.shape) != 1):
76+
outputs = np.argmax(outputs, axis=1).reshape(-1)
77+
return {"outputs": outputs - self._labels_shift}
9578

9679

9780
def get_dataset(data_files, batch_size, use_synthetic_data, preprocess_method, input_size):
@@ -162,15 +145,13 @@ def preprocess_sample_fn(record):
162145
input_size=input_size
163146
)
164147

165-
dataset = dataset.apply(
166-
tf.data.experimental.map_and_batch(
167-
map_func=preprocess_fn,
168-
batch_size=batch_size,
169-
num_parallel_calls=min(8, multiprocessing.cpu_count()),
170-
drop_remainder=True
171-
)
148+
dataset = dataset.map(
149+
map_func=preprocess_fn,
150+
num_parallel_calls=min(8, multiprocessing.cpu_count())
172151
)
173152

153+
dataset = dataset.batch(batch_size=batch_size, drop_remainder=True)
154+
174155
if use_synthetic_data:
175156
dataset = dataset.take(count=1) # loop over 1 batch
176157
dataset = dataset.cache()
@@ -246,16 +227,20 @@ def _input_fn(input_files, build_steps, model_phase):
246227
output_saved_model_dir=args.output_saved_model_dir,
247228
allow_build_at_runtime=args.allow_build_at_runtime,
248229
calibration_input_fn=calibration_input_fn,
230+
debug=args.debug,
249231
gpu_mem_cap=args.gpu_mem_cap,
250232
input_signature_key=args.input_signature_key,
251233
max_workspace_size_bytes=args.max_workspace_size,
252234
minimum_segment_size=args.minimum_segment_size,
253235
num_calib_inputs=args.num_calib_inputs,
254236
optimize_offline=args.optimize_offline,
255237
optimize_offline_input_fn=optimize_offline_input_fn,
238+
output_tensor_indices=args.output_tensor_indices,
239+
output_tensor_names=args.output_tensor_names,
256240
precision_mode=args.precision,
257241
use_dynamic_shape=args.use_dynamic_shape,
258-
use_tftrt=args.use_tftrt)
242+
use_tftrt=args.use_tftrt
243+
)
259244

260245
get_benchmark_input_fn = partial(
261246
get_dataset,

tftrt/examples/image_classification/scripts/base_script.sh

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,12 @@ do
3232
MODEL_DIR="${arg#*=}"
3333
shift # Remove --input_saved_model_dir= from processing
3434
;;
35+
--output_tensor_names=*)
36+
shift # Remove --output_tensor_names= from processing
37+
;;
38+
--output_tensor_indices=*)
39+
shift # Remove --output_tensor_indices= from processing
40+
;;
3541
*)
3642
BYPASS_ARGUMENTS=" ${BYPASS_ARGUMENTS} ${arg}"
3743
;;
@@ -43,6 +49,8 @@ done
4349
INPUT_SIZE=224
4450
PREPROCESS_METHOD="vgg"
4551
NUM_CLASSES=1001
52+
OUTPUT_TENSOR_NAME_FLAG=""
53+
OUTPUT_TENSOR_IDX_FLAG=""
4654

4755
case ${MODEL_NAME} in
4856
"inception_v3" | "inception_v4")
@@ -86,6 +94,8 @@ echo ""
8694
echo "[*] INPUT_SIZE: ${INPUT_SIZE}"
8795
echo "[*] PREPROCESS_METHOD: ${PREPROCESS_METHOD}"
8896
echo "[*] NUM_CLASSES: ${NUM_CLASSES}"
97+
echo "[*] OUTPUT_TENSOR_IDX_FLAG: ${OUTPUT_TENSOR_IDX_FLAG}"
98+
echo "[*] OUTPUT_TENSOR_NAME_FLAG: ${OUTPUT_TENSOR_NAME_FLAG}"
8999
echo ""
90100
echo "[*] BYPASS_ARGUMENTS: $(echo \"${BYPASS_ARGUMENTS}\" | tr -s ' ')"
91101
echo -e "********************************************************************\n"
@@ -140,6 +150,8 @@ COMMAND="${PREPEND_COMMAND} python image_classification.py \
140150
--input_size ${INPUT_SIZE} \
141151
--preprocess_method ${PREPROCESS_METHOD} \
142152
--num_classes ${NUM_CLASSES} \
153+
${OUTPUT_TENSOR_IDX_FLAG} \
154+
${OUTPUT_TENSOR_NAME_FLAG} \
143155
${BYPASS_ARGUMENTS}"
144156

145157
COMMAND=$(echo "${COMMAND}" | tr -s " ")

0 commit comments

Comments
 (0)