Skip to content
This repository was archived by the owner on Feb 3, 2025. It is now read-only.

Commit 8a5ccb0

Browse files
Merge branch 'nvidia_examples'
2 parents 653ea0a + b9c1332 commit 8a5ccb0

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+11478
-65
lines changed

tftrt/examples/benchmark_args.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,13 +133,28 @@ def __init__(self):
133133
"least as large as the number of samples in the dataset."
134134
)
135135

136+
self._add_bool_argument(
137+
name="no_tf32",
138+
default=False,
139+
required=False,
140+
help="If set to True, the benchmark will force not using TF32."
141+
)
142+
136143
self._add_bool_argument(
137144
name="use_xla",
138145
default=False,
139146
required=False,
140147
help="If set to True, the benchmark will use XLA JIT Compilation."
141148
)
142149

150+
self._add_bool_argument(
151+
name="use_xla_auto_jit",
152+
default=False,
153+
required=False,
154+
help="If set to True, the benchmark will use XLA JIT Auto "
155+
"Clustering Compilation."
156+
)
157+
143158
self._add_bool_argument(
144159
name="use_synthetic_data",
145160
default=False,
@@ -222,6 +237,14 @@ def __init__(self):
222237

223238
# =========================== DEBUG Flags ========================== #
224239

240+
self._parser.add_argument(
241+
"--export_metrics_json_path",
242+
type=str,
243+
default=None,
244+
help="If set, the script will export runtime metrics and arguments "
245+
"to the set location in JSON format for further processing."
246+
)
247+
225248
self._add_bool_argument(
226249
name="debug",
227250
default=False,

tftrt/examples/benchmark_runner.py

Lines changed: 47 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
import os
66

77
import abc
8+
import copy
9+
import json
810
import logging
911
import sys
1012
import time
@@ -57,6 +59,14 @@ def evaluate_model(self, predictions, expected, bypass_data_to_eval):
5759
def __init__(self, args):
5860
self._args = args
5961

62+
if args.use_xla_auto_jit:
63+
print("[Benchmark] - Activating XLA JIT Auto Clustering")
64+
os.environ["TF_XLA_FLAGS"] = "--tf_xla_auto_jit=2 --tf_xla_cpu_global_jit"
65+
66+
if args.no_tf32:
67+
print("[Benchmark] - Deactivating the use of TF32 format")
68+
os.environ["NVIDIA_TF32_OVERRIDE"] = "0"
69+
6070
logging.getLogger("tensorflow").setLevel(logging.INFO)
6171
logging.disable(logging.WARNING)
6272

@@ -96,6 +106,31 @@ def _debug_print(self, msg):
96106
if self._args.debug:
97107
print(f"[DEBUG] {msg}")
98108

109+
def _export_runtime_metrics_to_json(self, metric_dict):
110+
111+
metric_dict = {
112+
# Creating a copy to avoid modifying the original
113+
"results": copy.deepcopy(metric_dict),
114+
"runtime_arguments": vars(self._args)
115+
}
116+
117+
json_path = self._args.export_metrics_json_path
118+
if json_path is not None:
119+
try:
120+
with open(json_path, 'w') as json_f:
121+
json_string = json.dumps(
122+
metric_dict,
123+
default=lambda o: o.__dict__,
124+
sort_keys=True,
125+
indent=4
126+
)
127+
print(json_string, file=json_f)
128+
except Exception as e:
129+
print(
130+
"[ERROR] Impossible to save JSON File at path: "
131+
f"{json_path}.\nError: {str(e)}"
132+
)
133+
99134
def _get_graph_func(self):
100135
"""Retreives a frozen SavedModel and applies TF-TRT
101136
use_tftrt: bool, if true use TensorRT
@@ -381,16 +416,15 @@ def log_step(step_idx, display_every, iter_time):
381416

382417
with timed_section("Metric Computation"):
383418

419+
metrics = dict()
420+
384421
if not self._args.use_synthetic_data:
385422
metric, metric_units = self.evaluate_model(
386423
data_aggregator.predicted_dict,
387424
data_aggregator.expected_dict, bypass_data_to_eval
388425
)
389-
print(f"- {metric_units:35s}: {metric:.2f}")
390-
391-
metrics = dict()
426+
metrics["Metric"] = {metric_units: metric}
392427

393-
if not self._args.use_synthetic_data:
394428
metrics["Total Samples Processed"] = (
395429
data_aggregator.total_samples_processed
396430
)
@@ -411,10 +445,18 @@ def log_step(step_idx, display_every, iter_time):
411445
metrics['GPU Latency Min (ms)'] = np.min(run_times) * 1000
412446
metrics['GPU Latency Max (ms)'] = np.max(run_times) * 1000
413447

414-
for key, val in sorted(metrics.items()):
448+
self._export_runtime_metrics_to_json(metrics)
449+
450+
def log_value(key, val):
415451
if isinstance(val, int):
416452
print(f"- {key:35s}: {val}")
417453
else:
418454
print(f"- {key:35s}: {val:.2f}")
419455

456+
for key, val in sorted(metrics.items()):
457+
if isinstance(val, dict):
458+
log_value(*list(val.items())[0])
459+
else:
460+
log_value(key, val)
461+
420462
print() # visual spacing

tftrt/examples/benchmark_utils.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,3 +190,35 @@ def aggregate_data(self, y_pred, y):
190190
self._predicted[key][idx_start:idx_stop] = y_pred[key]
191191
for key, val in self._expected.items():
192192
self._expected[key][idx_start:idx_stop] = y[key]
193+
194+
195+
def patch_dali_dataset(dataset):
196+
import nvidia.dali.plugin.tf as dali_tf
197+
198+
if not isinstance(dataset, dali_tf.DALIDataset):
199+
raise TypeError(
200+
"Dataset supplied should be an instance of `DALIDataset`."
201+
f"Received: `{type(dataset)}`"
202+
)
203+
204+
def take(self, limit):
205+
class _Dataset(self.__class__):
206+
207+
def __init__(self, _ds, _limit):
208+
self._ds = _ds
209+
self._limit = _limit
210+
211+
def __iter__(self):
212+
idx = 0
213+
for data in self._ds:
214+
if idx >= self._limit:
215+
break
216+
yield data
217+
idx += 1
218+
219+
return _Dataset(self, limit)
220+
221+
# Monkey Patch
222+
dataset.__class__.take = take
223+
224+
return dataset

tftrt/examples/image_classification/base_run_inference.sh

Lines changed: 1 addition & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,7 @@ DATA_DIR=""
88
MODEL_DIR=""
99

1010
# Default Argument Values
11-
NVIDIA_TF32_OVERRIDE=""
12-
1311
BYPASS_ARGUMENTS=""
14-
TF_AUTO_JIT_XLA_FLAG=""
1512

1613
# Loop through arguments and process them
1714
for arg in "$@"
@@ -21,10 +18,6 @@ do
2118
MODEL_NAME="${arg#*=}"
2219
shift # Remove --model_name from processing
2320
;;
24-
--no_tf32)
25-
NVIDIA_TF32_OVERRIDE="NVIDIA_TF32_OVERRIDE=0"
26-
shift # Remove --no_tf32 from processing
27-
;;
2821
--data_dir=*)
2922
DATA_DIR="${arg#*=}"
3023
shift # Remove --data_dir= from processing
@@ -39,10 +32,6 @@ do
3932
--output_tensors_name=*)
4033
shift # Remove --output_tensors_name= from processing
4134
;;
42-
--use_xla_auto_jit)
43-
TF_AUTO_JIT_XLA_FLAG="TF_XLA_FLAGS=\"--tf_xla_auto_jit=2 --tf_xla_cpu_global_jit\""
44-
shift # Remove --use_xla_auto_jit from processing
45-
;;
4635
*)
4736
BYPASS_ARGUMENTS=" ${BYPASS_ARGUMENTS} ${arg}"
4837
;;
@@ -105,16 +94,13 @@ echo ""
10594
echo "[*] DATA_DIR: ${DATA_DIR}"
10695
echo "[*] MODEL_DIR: ${MODEL_DIR}"
10796
echo ""
108-
echo "[*] NVIDIA_TF32_OVERRIDE: ${NVIDIA_TF32_OVERRIDE}"
109-
echo ""
11097
# Custom Image Classification Task Flags
11198
echo "[*] INPUT_SIZE: ${INPUT_SIZE}"
11299
echo "[*] PREPROCESS_METHOD: ${PREPROCESS_METHOD}"
113100
echo "[*] NUM_CLASSES: ${NUM_CLASSES}"
114101
echo "[*] MAX_SAMPLES: ${MAX_SAMPLES}"
115102
echo "[*] OUTPUT_TENSORS_NAME: ${OUTPUT_TENSORS_NAME}"
116103
echo ""
117-
echo "[*] TF_AUTO_JIT_XLA_FLAG: ${TF_AUTO_JIT_XLA_FLAG}"
118104
echo "[*] BYPASS_ARGUMENTS: $(echo \"${BYPASS_ARGUMENTS}\" | tr -s ' ')"
119105
echo -e "********************************************************************\n"
120106

@@ -157,10 +143,7 @@ BENCH_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
157143
cd ${BENCH_DIR}
158144

159145
# Execute the example
160-
161-
PREPEND_COMMAND="${TF_AUTO_JIT_XLA_FLAG} ${NVIDIA_TF32_OVERRIDE}"
162-
163-
COMMAND="${PREPEND_COMMAND} python image_classification.py \
146+
COMMAND="python image_classification.py \
164147
--data_dir ${DATA_DIR} \
165148
--calib_data_dir ${DATA_DIR} \
166149
--input_saved_model_dir ${INPUT_SAVED_MODEL_DIR} \

tftrt/examples/image_classification/image_classification.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ def evaluate_model(self, predictions, expected, bypass_data_to_eval):
219219
This computes overall accuracy, mAP, etc. Returns the
220220
metric value and a metric_units string naming the metric.
221221
222-
Note: script arguments can be accessed using `args.attr`
222+
Note: script arguments can be accessed using `self._args.attr`
223223
"""
224224

225225
return (
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
2+
MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs:
3+
4+
signature_def['__saved_model_init_op']:
5+
The given SavedModel SignatureDef contains the following input(s):
6+
The given SavedModel SignatureDef contains the following output(s):
7+
outputs['__saved_model_init_op'] tensor_info:
8+
dtype: DT_INVALID
9+
shape: unknown_rank
10+
name: NoOp
11+
Method name is:
12+
13+
signature_def['serving_default']:
14+
The given SavedModel SignatureDef contains the following input(s):
15+
inputs['input_mask'] tensor_info:
16+
dtype: DT_INT32
17+
shape: (-1, 384)
18+
name: serving_default_input_mask:0
19+
inputs['input_type_ids'] tensor_info:
20+
dtype: DT_INT32
21+
shape: (-1, 384)
22+
name: serving_default_input_type_ids:0
23+
inputs['input_word_ids'] tensor_info:
24+
dtype: DT_INT32
25+
shape: (-1, 384)
26+
name: serving_default_input_word_ids:0
27+
The given SavedModel SignatureDef contains the following output(s):
28+
outputs['end_positions'] tensor_info:
29+
dtype: DT_FLOAT
30+
shape: (-1, 384)
31+
name: StatefulPartitionedCall:0
32+
outputs['start_positions'] tensor_info:
33+
dtype: DT_FLOAT
34+
shape: (-1, 384)
35+
name: StatefulPartitionedCall:1
36+
Method name is: tensorflow/serving/predict
37+
38+
Defined Functions:
39+
Function Name: '__call__'
40+
Option #1
41+
Callable with:
42+
Argument #1
43+
DType: list
44+
Value: [TensorSpec(shape=(None, 384), dtype=tf.int32, name='input_word_ids'), TensorSpec(shape=(None, 384), dtype=tf.int32, name='input_mask'), TensorSpec(shape=(None, 384), dtype=tf.int32, name='input_type_ids'), ]
45+
Argument #2
46+
DType: bool
47+
Value: True
48+
Argument #3
49+
DType: NoneType
50+
Value: None
51+
Option #2
52+
Callable with:
53+
Argument #1
54+
DType: list
55+
Value: [TensorSpec(shape=(None, 384), dtype=tf.int32, name='inputs/0'), TensorSpec(shape=(None, 384), dtype=tf.int32, name='inputs/1'), TensorSpec(shape=(None, 384), dtype=tf.int32, name='inputs/2'), ]
56+
Argument #2
57+
DType: bool
58+
Value: True
59+
Argument #3
60+
DType: NoneType
61+
Value: None
62+
Option #3
63+
Callable with:
64+
Argument #1
65+
DType: list
66+
Value: [TensorSpec(shape=(None, 384), dtype=tf.int32, name='input_word_ids'), TensorSpec(shape=(None, 384), dtype=tf.int32, name='input_mask'), TensorSpec(shape=(None, 384), dtype=tf.int32, name='input_type_ids'), ]
67+
Argument #2
68+
DType: bool
69+
Value: False
70+
Argument #3
71+
DType: NoneType
72+
Value: None
73+
Option #4
74+
Callable with:
75+
Argument #1
76+
DType: list
77+
Value: [TensorSpec(shape=(None, 384), dtype=tf.int32, name='inputs/0'), TensorSpec(shape=(None, 384), dtype=tf.int32, name='inputs/1'), TensorSpec(shape=(None, 384), dtype=tf.int32, name='inputs/2'), ]
78+
Argument #2
79+
DType: bool
80+
Value: False
81+
Argument #3
82+
DType: NoneType
83+
Value: None
84+
85+
Function Name: '_default_save_signature'
86+
Option #1
87+
Callable with:
88+
Argument #1
89+
DType: list
90+
Value: [TensorSpec(shape=(None, 384), dtype=tf.int32, name='input_word_ids'), TensorSpec(shape=(None, 384), dtype=tf.int32, name='input_mask'), TensorSpec(shape=(None, 384), dtype=tf.int32, name='input_type_ids'), ]
91+
92+
Function Name: 'call_and_return_all_conditional_losses'
93+
Option #1
94+
Callable with:
95+
Argument #1
96+
DType: list
97+
Value: [TensorSpec(shape=(None, 384), dtype=tf.int32, name='inputs/0'), TensorSpec(shape=(None, 384), dtype=tf.int32, name='inputs/1'), TensorSpec(shape=(None, 384), dtype=tf.int32, name='inputs/2'), ]
98+
Argument #2
99+
DType: bool
100+
Value: True
101+
Argument #3
102+
DType: NoneType
103+
Value: None
104+
Option #2
105+
Callable with:
106+
Argument #1
107+
DType: list
108+
Value: [TensorSpec(shape=(None, 384), dtype=tf.int32, name='inputs/0'), TensorSpec(shape=(None, 384), dtype=tf.int32, name='inputs/1'), TensorSpec(shape=(None, 384), dtype=tf.int32, name='inputs/2'), ]
109+
Argument #2
110+
DType: bool
111+
Value: False
112+
Argument #3
113+
DType: NoneType
114+
Value: None
115+
Option #3
116+
Callable with:
117+
Argument #1
118+
DType: list
119+
Value: [TensorSpec(shape=(None, 384), dtype=tf.int32, name='input_word_ids'), TensorSpec(shape=(None, 384), dtype=tf.int32, name='input_mask'), TensorSpec(shape=(None, 384), dtype=tf.int32, name='input_type_ids'), ]
120+
Argument #2
121+
DType: bool
122+
Value: True
123+
Argument #3
124+
DType: NoneType
125+
Value: None
126+
Option #4
127+
Callable with:
128+
Argument #1
129+
DType: list
130+
Value: [TensorSpec(shape=(None, 384), dtype=tf.int32, name='input_word_ids'), TensorSpec(shape=(None, 384), dtype=tf.int32, name='input_mask'), TensorSpec(shape=(None, 384), dtype=tf.int32, name='input_type_ids'), ]
131+
Argument #2
132+
DType: bool
133+
Value: False
134+
Argument #3
135+
DType: NoneType
136+
Value: None

0 commit comments

Comments
 (0)