Skip to content
This repository was archived by the owner on Feb 3, 2025. It is now read-only.

Commit 3458a5b

Browse files
Cleanup
1 parent f5a72d8 commit 3458a5b

File tree

7 files changed

+262
-211
lines changed

7 files changed

+262
-211
lines changed

tftrt/examples/benchmark_args.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -109,14 +109,6 @@ def __init__(self):
109109
"not set. Will only work with `--use_tftrt`."
110110
)
111111

112-
self._parser.add_argument(
113-
"--output_tensor_indices",
114-
type=str,
115-
default=None,
116-
help="Output tensors' index, defaults to all tensors available if "
117-
"not set. Will only work without `--use_tftrt`."
118-
)
119-
120112
self._parser.add_argument(
121113
"--num_iterations",
122114
type=int,
@@ -235,6 +227,13 @@ def __init__(self):
235227
help="If set to True, will print additional information."
236228
)
237229

230+
self._add_bool_argument(
231+
name="debug_performance",
232+
default=False,
233+
required=False,
234+
help="If set to True, will print additional information."
235+
)
236+
238237
def _add_bool_argument(
239238
self, name=None, default=False, required=False, help=None
240239
):

tftrt/examples/benchmark_runner.py

Lines changed: 137 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,12 @@
1515
from benchmark_utils import force_gpu_resync
1616
from benchmark_utils import print_dict
1717
from benchmark_utils import timed_section
18+
from benchmark_utils import timed_dataset
1819

1920
import numpy as np
2021
import tensorflow as tf
2122

2223
from tensorflow.python.compiler.tensorrt import trt_convert as trt
23-
from tensorflow.python.saved_model import tag_constants
2424

2525
__all__ = ["BaseBenchmarkRunner"]
2626

@@ -102,7 +102,7 @@ def _get_graph_func(self):
102102

103103
if not self._args.use_tftrt:
104104

105-
with timed_section('Loading TensorFlow native model...'):
105+
with timed_section("Loading TensorFlow native model"):
106106

107107
saved_model_loaded = tf.saved_model.load(
108108
export_dir=self._args.input_saved_model_dir,
@@ -111,10 +111,18 @@ def _get_graph_func(self):
111111

112112
graph_func = saved_model_loaded.signatures[
113113
self._args.input_signature_key]
114+
# from tensorflow.python.framework import convert_to_constants
114115
# graph_func = convert_to_constants.convert_variables_to_constants_v2(
115116
# graph_func
116117
# )
117118

119+
# Known TF Issue: https://github.com/tensorflow/tensorflow/issues/37615#issuecomment-767804930
120+
# it looks like if the original trackable object is released by
121+
# the Python garbage collector once it goes out of scope, and
122+
# the signature returned by the function does not maintain a
123+
# back-reference to the original loaded object.
124+
graph_func._backref_to_saved_model = saved_model_loaded
125+
118126
else:
119127

120128
def get_trt_precision(precision):
@@ -208,7 +216,7 @@ def engine_build_input_fn(num_batches, model_phase):
208216
num_batches=1, model_phase="Building"
209217
)
210218

211-
with timed_section('Building TensorRT engines...'):
219+
with timed_section("Building TensorRT engines"):
212220
converter.build(
213221
input_fn=tf.autograph.experimental.
214222
do_not_convert(offline_opt_input_fn)
@@ -217,25 +225,30 @@ def engine_build_input_fn(num_batches, model_phase):
217225

218226
if self._args.output_saved_model_dir is not None:
219227

220-
with timed_section('Saving converted graph with TF-TRT ...'):
228+
with timed_section("Saving converted graph with TF-TRT"):
221229
converter.save(self._args.output_saved_model_dir)
222230
print(
223231
f"Converted graph saved to "
224232
f"`{self._args.output_saved_model_dir}`"
225233
)
226234

227-
savedmodel_outputs = print_dict(
228-
graph_func.structured_outputs,
229-
redirect_to_str=True
230-
)
235+
if isinstance(graph_func.structured_outputs, (tuple, list)):
236+
savedmodel_outputs = "\n - ".join([
237+
str(t) for t in graph_func.structured_outputs
238+
])
239+
savedmodel_outputs = f" - {savedmodel_outputs}"
240+
else:
241+
savedmodel_outputs = print_dict(
242+
graph_func.structured_outputs, redirect_to_str=True
243+
)
244+
self._debug_print(f"Available Output Tensors:\n{savedmodel_outputs}")
245+
print() # visual spacing
231246

232247
chosen_outputs = "\n - ".join(
233248
sorted(self._args.output_tensors_name.split(","))
234249
)
235-
236-
self._debug_print(f"Available Output Tensors:\n{savedmodel_outputs}")
237-
print() # visual spacing
238250
self._debug_print(f"Chosen Output Tensor:\n - {chosen_outputs}")
251+
print() # visual spacing
239252

240253
return graph_func
241254

@@ -244,111 +257,139 @@ def execute_benchmark(self):
244257
It consumes TFRecords with labels and reports accuracy.
245258
"""
246259

247-
print("\nStart loading model ...")
248-
249-
graph_func = self._get_graph_func()
260+
with timed_section("Model Loading"):
261+
graph_func = self._get_graph_func()
250262

251-
dataset, bypass_data_to_eval = self.get_dataset_batches()
263+
with timed_section("Model Inference"):
264+
dataset, bypass_data_to_eval = self.get_dataset_batches()
252265

253-
print("\nStart inference ...")
266+
if self._args.use_synthetic_data:
267+
old_ds = dataset
268+
try:
269+
dataset = dataset.take(count=1) # loop over 1 batch
270+
dataset = dataset.cache()
271+
dataset = dataset.repeat()
272+
dataset = dataset.prefetch(
273+
buffer_size=tf.data.experimental.AUTOTUNE
274+
)
275+
self._debug_print(
276+
"Model dataset has been replaced by a synthetic data "
277+
"loader to minimize data loading jitter."
278+
)
254279

255-
@force_gpu_resync
256-
@tf.function(jit_compile=self._args.use_xla)
257-
def infer_batch(x):
258-
if isinstance(x, (tuple, list)):
259-
model_out = graph_func(*x)
260-
elif isinstance(x, dict):
261-
model_out = graph_func(**x)
262-
else:
263-
model_out = graph_func(x)
280+
except Exception as e:
281+
dataset = old_ds
282+
print(
283+
f"[ERROR] Impossible to transform the dataset into a "
284+
f"synthetic dataset. Performance numbers will be "
285+
f"impacted.\nError: {str(e)}."
286+
)
264287

265-
if self._args.output_tensors_name is not None:
266-
output_tensors_name = self._args.output_tensors_name.split(",")
267-
if len(output_tensors_name) == 1:
268-
return model_out[self._args.output_tensors_name]
288+
@force_gpu_resync
289+
@tf.function(jit_compile=self._args.use_xla)
290+
def infer_batch(x):
291+
if isinstance(x, (tuple, list)):
292+
model_out = graph_func(*x)
293+
elif isinstance(x, dict):
294+
model_out = graph_func(**x)
269295
else:
270-
return {key: model_out[key] for key in output_tensors_name}
296+
model_out = graph_func(x)
271297

272-
return model_out
298+
if self._args.output_tensors_name is not None:
299+
output_ts_name = self._args.output_tensors_name.split(",")
300+
if len(output_ts_name) == 1:
301+
return model_out[self._args.output_tensors_name]
302+
else:
303+
return {key: model_out[key] for key in output_ts_name}
273304

274-
data_aggregator = DataAggregator(
275-
self.postprocess_model_outputs, args=self._args
276-
)
305+
return model_out
277306

278-
iter_times = []
307+
if not self._args.use_synthetic_data:
308+
data_aggregator = DataAggregator(
309+
self.postprocess_model_outputs, args=self._args
310+
)
279311

280-
def log_step(step_idx, display_every, iter_time):
281-
if step_idx % display_every == 0:
282-
print(" step %04d, iter_time(ms)=%.3f" % (step_idx, iter_time))
312+
iter_times = []
283313

284-
data_start_t = time.time()
285-
for step_idx, data_batch in enumerate(dataset):
286-
x, y = self.preprocess_model_inputs(data_batch)
314+
def log_step(step_idx, display_every, iter_time):
315+
if step_idx % display_every == 0:
316+
print(
317+
f" step {step_idx:04d}, iter_time(ms)={iter_time:.3f}"
318+
)
287319

288-
if self._args.debug:
289-
print("Step:", step_idx + 1)
290-
print("Data Loading Time:", time.time() - data_start_t)
320+
dataset = timed_dataset(
321+
dataset, activate=self._args.debug_performance
322+
)
291323

292-
start_time = time.time()
293-
y_pred = infer_batch(x)
294-
iter_times.append(time.time() - start_time)
324+
for step_idx, data_batch in enumerate(dataset):
325+
x, y = self.preprocess_model_inputs(data_batch)
295326

296-
if not self._args.debug:
297-
log_step(
298-
step_idx + 1, self._args.display_every,
299-
np.mean(iter_times[-self._args.display_every:]) * 1000
300-
)
301-
else:
302-
print("GPU Iteration Time:", iter_times[-1])
327+
start_time = time.time()
328+
y_pred = infer_batch(x)
329+
iter_times.append(time.time() - start_time)
303330

304-
data_aggregator.aggregate_data(y_pred, y)
331+
if not self._args.debug_performance:
332+
log_step(
333+
step_idx + 1, self._args.display_every,
334+
np.mean(iter_times[-self._args.display_every:]) * 1000
335+
)
336+
else:
337+
print(f"{'GPU Iteration Time':18s}: {iter_times[-1]:.4f}s")
305338

306-
if (self._args.num_iterations is not None and
307-
step_idx + 1 >= self._args.num_iterations):
308-
break
339+
if not self._args.use_synthetic_data:
340+
data_aggregator.aggregate_data(y_pred, y)
309341

310-
if self._args.debug:
311-
print("===============")
312-
data_start_t = time.time()
342+
if (self._args.num_iterations is not None and
343+
step_idx + 1 >= self._args.num_iterations):
344+
break
313345

314-
if step_idx % self._args.display_every != 0: # avoids double printing
315-
log_step(
316-
step_idx + 1,
317-
display_every=1, # force print
318-
iter_time=np.mean(iter_times[-self._args.display_every:]) * 1000
319-
)
346+
if (not self._args.debug_performance and
347+
step_idx % self._args.display_every !=
348+
0): # avoids double printing
349+
log_step(
350+
step_idx + 1,
351+
display_every=1, # force print
352+
iter_time=(
353+
np.mean(iter_times[-self._args.display_every:]) * 1000
354+
)
355+
)
320356

321-
print("\nEnd of inference. Computing metrics ...\n")
357+
with timed_section("Metric Computation"):
322358

323-
metric, metric_units = self.evaluate_model(
324-
data_aggregator.predicted_dict, data_aggregator.expected_dict,
325-
bypass_data_to_eval
326-
)
327-
print(f"- {metric_units:35s}: {metric:.2f}")
359+
if not self._args.use_synthetic_data:
360+
metric, metric_units = self.evaluate_model(
361+
data_aggregator.predicted_dict,
362+
data_aggregator.expected_dict, bypass_data_to_eval
363+
)
364+
print(f"- {metric_units:35s}: {metric:.2f}")
328365

329-
metrics = dict()
366+
metrics = dict()
330367

331-
metrics["Total Samples Processed"] = (
332-
data_aggregator.total_samples_processed
333-
)
368+
if not self._args.use_synthetic_data:
369+
metrics["Total Samples Processed"] = (
370+
data_aggregator.total_samples_processed
371+
)
334372

335-
# Skipping last batch. Might have different batch_size
336-
run_times = np.array(iter_times)[self._args.num_warmup_iterations:-1]
373+
# Skipping last batch. Might have different batch_size
374+
run_times = np.array(iter_times)
375+
run_times = run_times[self._args.num_warmup_iterations:-1]
337376

338-
metrics['Total GPU Time (s)'] = int(np.ceil(np.sum(iter_times)))
339-
metrics['Throughput (samples/sec)'] = np.mean(
340-
self._args.batch_size / run_times
341-
)
342-
metrics['99th_percentile (ms)'] = np.percentile(
343-
run_times, q=99, interpolation='lower'
344-
) * 1000
345-
metrics['GPU Latency Mean (ms)'] = np.mean(run_times) * 1000
346-
metrics['GPU Latency Median (ms)'] = np.median(run_times) * 1000
347-
metrics['GPU Latency Min (ms)'] = np.min(run_times) * 1000
348-
metrics['GPU Latency Max (ms)'] = np.max(run_times) * 1000
349-
350-
for key, val in sorted(metrics.items()):
351-
if isinstance(val, int):
352-
print(f"- {key:35s}: {val}")
353-
else:
354-
print(f"- {key:35s}: {val:.2f}")
377+
metrics['Total GPU Time (s)'] = int(np.ceil(np.sum(iter_times)))
378+
metrics['Throughput (samples/sec)'] = np.mean(
379+
self._args.batch_size / run_times
380+
)
381+
metrics['99th_percentile (ms)'] = np.percentile(
382+
run_times, q=99, interpolation='lower'
383+
) * 1000
384+
metrics['GPU Latency Mean (ms)'] = np.mean(run_times) * 1000
385+
metrics['GPU Latency Median (ms)'] = np.median(run_times) * 1000
386+
metrics['GPU Latency Min (ms)'] = np.min(run_times) * 1000
387+
metrics['GPU Latency Max (ms)'] = np.max(run_times) * 1000
388+
389+
for key, val in sorted(metrics.items()):
390+
if isinstance(val, int):
391+
print(f"- {key:35s}: {val}")
392+
else:
393+
print(f"- {key:35s}: {val:.2f}")
394+
395+
print() # visual spacing

0 commit comments

Comments
 (0)