Skip to content
This repository was archived by the owner on Feb 3, 2025. It is now read-only.

Commit 002f035

Browse files
author
DEKHTIARJonathan
committed
YAPF formating Applied
1 parent 23914ee commit 002f035

File tree

26 files changed

+1084
-546
lines changed

26 files changed

+1084
-546
lines changed

tests/utils.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,7 @@ def assertNotRaises(self, exc_type):
2727

2828

2929
def list_all_py_files():
30-
for _dir in [
31-
"tests",
32-
os.path.join("tftrt", "benchmarking-python")
33-
]:
30+
for _dir in ["tests", os.path.join("tftrt", "benchmarking-python")]:
3431
for _file in iglob(f"{_dir}/**/*.py", recursive=True):
3532
if any([path in _file for path in _excludes_paths]):
3633
continue
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-

tftrt/benchmarking-python/benchmark_args.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -343,10 +343,7 @@ def _validate_args(self, args):
343343
f"({args.num_iterations} <= {args.num_warmup_iterations})"
344344
)
345345

346-
if (
347-
args.tf_profile_verbose and
348-
args.tf_profile_export_path is None
349-
):
346+
if (args.tf_profile_verbose and args.tf_profile_export_path is None):
350347
raise ValueError(
351348
"`--tf_profile_verbose` can only be set if "
352349
"`--tf_profile_export_path=/path/to/export` is defined."

tftrt/benchmarking-python/benchmark_autotuner.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111

1212
class _TFFunctionAutoTuner(object):
13+
1314
def __init__(self, funcs, calls_per_func, skip_n_first):
1415
if not isinstance(funcs, (tuple, list)):
1516
raise ValueError("Argument `funcs` must be a list or tuple.")
@@ -33,17 +34,23 @@ def _autotune(self, *arg, **kwargs):
3334
output = self._fns[fn_id](*arg, **kwargs)
3435
self._timings[fn_id].append(time.time() - start_t)
3536
except IndexError:
36-
print("\n[DEBUG] AutoTuning is over... Collecting timing statistics:")
37+
print(
38+
"\n[DEBUG] AutoTuning is over... Collecting timing statistics:"
39+
)
3740
perf_data = []
3841
for idx, fn_stat in enumerate(self._timings):
3942
perf_data.append(np.mean(fn_stat[self._skip_n_first:]))
40-
print(f"\t- [DEBUG] Function ID: {idx} - "
41-
f"Name: {self._fns[idx].__name__:40s} - "
42-
f"Average Exec Time: {perf_data[-1]}")
43+
print(
44+
f"\t- [DEBUG] Function ID: {idx} - "
45+
f"Name: {self._fns[idx].__name__:40s} - "
46+
f"Average Exec Time: {perf_data[-1]}"
47+
)
4348

4449
best_fn_id = np.argmin(perf_data)
45-
print(f"[DEBUG] Selecting function ID: {best_fn_id}. "
46-
f"Setting exec path to: `{self._fns[best_fn_id].__name__}`\n")
50+
print(
51+
f"[DEBUG] Selecting function ID: {best_fn_id}. "
52+
f"Setting exec path to: `{self._fns[best_fn_id].__name__}`\n"
53+
)
4754

4855
self._best_fn = self._fns[best_fn_id]
4956
return self._best_fn(*arg, **kwargs)
@@ -58,13 +65,15 @@ def __call__(self, *arg, **kwargs):
5865
def _force_using_concrete_function(func):
5966
# `context` needs to be a closure of type list or dict for persistance
6067
context = []
68+
6169
def _wrapper(*args, **kwargs):
6270
try:
6371
return context[0](*args, **kwargs)
6472
except IndexError:
6573
print(f"[INFO] Building the concrete function")
6674
context.append(func.get_concrete_function(*args, **kwargs))
6775
return context[0](*args, **kwargs)
76+
6877
return _wrapper
6978

7079

@@ -98,8 +107,10 @@ def tf_concrete_function(*args, **kwargs):
98107

99108
funcs2autotune = [eager_function, tf_function]
100109
if use_synthetic_data:
101-
print("[INFO] Allowing direct concrete_function call with "
102-
"synthetic data loader.")
110+
print(
111+
"[INFO] Allowing direct concrete_function call with "
112+
"synthetic data loader."
113+
)
103114
funcs2autotune.append(tf_concrete_function)
104115

105116
return _TFFunctionAutoTuner(

tftrt/benchmarking-python/benchmark_runner.py

Lines changed: 75 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@
3636
from tensorflow.python.saved_model import signature_constants
3737
from tensorflow.python.saved_model import tag_constants
3838

39-
4039
__all__ = ["BaseBenchmarkRunner"]
4140

4241

@@ -71,7 +70,8 @@ def __init__(self, args):
7170

7271
if args.use_xla_auto_jit:
7372
print("[Benchmark] - Activating XLA JIT Auto Clustering")
74-
os.environ["TF_XLA_FLAGS"] = "--tf_xla_auto_jit=2 --tf_xla_cpu_global_jit"
73+
os.environ["TF_XLA_FLAGS"] = "--tf_xla_auto_jit=2"
74+
os.environ["TF_XLA_FLAGS"] += " --tf_xla_cpu_global_jit"
7575

7676
if args.no_tf32:
7777
print("[Benchmark] - Deactivating the use of TF32 format")
@@ -111,10 +111,14 @@ def _config_gpu_memory(self, gpu_mem_cap):
111111
else:
112112
try:
113113
set_virtual_device_configuration = tf.config.set_virtual_device_configuration
114-
device_config = tf.config.LogicalDeviceConfiguration(memory_limit=gpu_mem_cap)
114+
device_config = tf.config.LogicalDeviceConfiguration(
115+
memory_limit=gpu_mem_cap
116+
)
115117
except AttributeError:
116118
set_virtual_device_configuration = tf.config.experimental.set_virtual_device_configuration
117-
device_config = tf.config.experimental.VirtualDeviceConfiguration(memory_limit=gpu_mem_cap)
119+
device_config = tf.config.experimental.VirtualDeviceConfiguration(
120+
memory_limit=gpu_mem_cap
121+
)
118122

119123
set_virtual_device_configuration(gpu, [device_config])
120124
except RuntimeError as e:
@@ -133,9 +137,9 @@ def _export_runtime_metrics_to_json(self, metric_dict):
133137
return
134138

135139
metric_dict = {
136-
# Creating a copy to avoid modifying the original
137-
"results": copy.deepcopy(metric_dict),
138-
"runtime_arguments": vars(self._args)
140+
# Creating a copy to avoid modifying the original
141+
"results": copy.deepcopy(metric_dict),
142+
"runtime_arguments": vars(self._args)
139143
}
140144

141145
with open(file_path, 'w') as json_f:
@@ -160,6 +164,7 @@ def _export_runtime_metrics_to_csv(self, metric_dict):
160164

161165
data = {f"metric_{k}": v for k, v in metric_dict.items()}
162166

167+
# yapf: disable
163168
args_to_save = [
164169
"batch_size",
165170
"input_saved_model_dir",
@@ -172,6 +177,7 @@ def _export_runtime_metrics_to_csv(self, metric_dict):
172177
"use_xla",
173178
"use_xla_auto_jit"
174179
]
180+
# yapf: enable
175181

176182
runtime_arguments = vars(self._args)
177183
for key in args_to_save:
@@ -181,11 +187,15 @@ def _export_runtime_metrics_to_csv(self, metric_dict):
181187

182188
if not os.path.isfile(file_path):
183189
with open(file_path, 'w') as outcsv:
184-
writer = csv.DictWriter(outcsv, fieldnames=fieldnames, delimiter=',')
190+
writer = csv.DictWriter(
191+
outcsv, fieldnames=fieldnames, delimiter=','
192+
)
185193
writer.writeheader()
186194

187195
with open(file_path, 'a') as outcsv:
188-
writer = csv.DictWriter(outcsv, fieldnames=fieldnames, delimiter=',')
196+
writer = csv.DictWriter(
197+
outcsv, fieldnames=fieldnames, delimiter=','
198+
)
189199
writer.writerow(data)
190200

191201
except Exception as e:
@@ -209,7 +219,9 @@ def load_model_from_disk(
209219
graph_func = saved_model_loaded.signatures[signature_key]
210220

211221
if precision == "FP16":
212-
tf.config.optimizer.set_experimental_options({"auto_mixed_precision": True})
222+
tf.config.optimizer.set_experimental_options({
223+
"auto_mixed_precision": True
224+
})
213225

214226
# Known TF Issue: https://github.com/tensorflow/tensorflow/issues/37615#issuecomment-767804930
215227
# it looks like if the original trackable object is released by
@@ -429,7 +441,10 @@ def infer_batch(x):
429441
memcopy_times = []
430442
dequeue_times = []
431443

432-
def log_step(step_idx, display_every, iter_time, memcpyHtoD_time, dequeue_time):
444+
def log_step(
445+
step_idx, display_every, iter_time, memcpyHtoD_time,
446+
dequeue_time
447+
):
433448
if step_idx % display_every == 0:
434449
print(
435450
f"step {step_idx:04d}, "
@@ -439,6 +454,7 @@ def log_step(step_idx, display_every, iter_time, memcpyHtoD_time, dequeue_time):
439454
)
440455

441456
if self._args.tf_profile_export_path:
457+
442458
def start_profiling():
443459
if self._args.tf_profile_verbose:
444460
profiler_opts = tf.profiler.experimental.ProfilerOptions(
@@ -482,9 +498,9 @@ def start_profiling():
482498
ds_iter = iter(dataset)
483499

484500
dequeue_batch_fn = get_dequeue_batch_fn(
485-
ds_iter,
486-
use_xla=self._args.use_xla,
487-
use_synthetic_data=self._args.use_synthetic_data
501+
ds_iter,
502+
use_xla=self._args.use_xla,
503+
use_synthetic_data=self._args.use_synthetic_data
488504
)
489505

490506
force_data_on_gpu_fn = get_force_data_on_gpu_fn(
@@ -500,10 +516,8 @@ def start_profiling():
500516
if step_idx == self._args.num_warmup_iterations - 5:
501517
start_profiling()
502518

503-
if (
504-
self._args.num_iterations is not None and
505-
step_idx > self._args.num_iterations
506-
):
519+
if (self._args.num_iterations is not None and
520+
step_idx > self._args.num_iterations):
507521
break
508522

509523
with tracing_ctx('', step_num=step_idx, _r=1):
@@ -534,28 +548,44 @@ def start_profiling():
534548
log_step(
535549
step_idx,
536550
display_every=self._args.display_every,
537-
iter_time=np.mean(iter_times[-self._args.display_every:]) * 1000,
538-
memcpyHtoD_time=np.mean(memcopy_times[-self._args.display_every:]) * 1000,
539-
dequeue_time=np.mean(dequeue_times[-self._args.display_every:]) * 1000
551+
iter_time=np.mean(
552+
iter_times[-self._args.display_every:]
553+
) * 1000,
554+
memcpyHtoD_time=np.mean(
555+
memcopy_times[-self._args.display_every:]
556+
) * 1000,
557+
dequeue_time=np.mean(
558+
dequeue_times[-self._args.display_every:]
559+
) * 1000
540560
)
541561
else:
542-
print(f"{'GPU Iteration Time':18s}: {iter_times[-1]:08.4f}s")
543-
print(f"{'Data MemCopyHtoD Time':18s}: {memcpyHtoD_time[-1]:08.4f}s")
544-
print(f"{'Data Dequeue Time':18s}: {dequeue_times[-1]:08.4f}s")
562+
print(
563+
f"{'GPU Iteration Time':18s}: {iter_times[-1]:08.4f}s"
564+
)
565+
print(
566+
f"{'Data MemCopyHtoD Time':18s}: {memcpyHtoD_time[-1]:08.4f}s"
567+
)
568+
print(
569+
f"{'Data Dequeue Time':18s}: {dequeue_times[-1]:08.4f}s"
570+
)
545571

546572
if not self._args.use_synthetic_data:
547573
data_aggregator.aggregate_data(y_pred, y)
548574

549-
if (
550-
not self._args.debug_performance and
551-
step_idx % self._args.display_every != 0
552-
): # avoids double printing
575+
if (not self._args.debug_performance and
576+
step_idx % self._args.display_every !=
577+
0): # avoids double printing
553578
log_step(
554579
step_idx,
555580
display_every=1, # force print
556-
iter_time=np.mean(iter_times[-self._args.display_every:]) * 1000,
557-
memcpyHtoD_time=np.mean(memcopy_times[-self._args.display_every:]) * 1000,
558-
dequeue_time=np.mean(dequeue_times[-self._args.display_every:]) * 1000
581+
iter_time=np.mean(iter_times[-self._args.display_every:]) *
582+
1000,
583+
memcpyHtoD_time=np.mean(
584+
memcopy_times[-self._args.display_every:]
585+
) * 1000,
586+
dequeue_time=np.mean(
587+
dequeue_times[-self._args.display_every:]
588+
) * 1000
559589
)
560590

561591
if step_idx >= 100:
@@ -588,13 +618,17 @@ def start_profiling():
588618

589619
metrics['Total GPU Time (s)'] = int(np.ceil(np.sum(iter_times)))
590620
metrics['Throughput (samples/sec)'] = (
591-
self._args.batch_size / sp.stats.trim_mean(
592-
iter_times, self._args.trim_mean_percentage))
621+
self._args.batch_size /
622+
sp.stats.trim_mean(iter_times, self._args.trim_mean_percentage)
623+
)
593624

594625
def timing_metrics(time_arr, log_prefix):
595626
data = dict()
596-
data[f"{log_prefix} Trim Mean [{self._args.trim_mean_percentage * 100}%] (ms)"] = (
597-
sp.stats.trim_mean(time_arr, self._args.trim_mean_percentage) * 1000
627+
data[
628+
f"{log_prefix} Trim Mean [{self._args.trim_mean_percentage * 100}%] (ms)"
629+
] = (
630+
sp.stats.
631+
trim_mean(time_arr, self._args.trim_mean_percentage) * 1000
598632
)
599633
data[f"{log_prefix} 99th_percentile (ms)"] = np.percentile(
600634
time_arr, q=99, interpolation='lower'
@@ -606,8 +640,12 @@ def timing_metrics(time_arr, log_prefix):
606640
return data
607641

608642
metrics.update(timing_metrics(iter_times, "GPU Latency"))
609-
metrics.update(timing_metrics(dequeue_times, "Data Batch Dequeue Time"))
610-
metrics.update(timing_metrics(memcopy_times, "Data MemCopyHtoD Time"))
643+
metrics.update(
644+
timing_metrics(dequeue_times, "Data Batch Dequeue Time")
645+
)
646+
metrics.update(
647+
timing_metrics(memcopy_times, "Data MemCopyHtoD Time")
648+
)
611649

612650
self._export_runtime_metrics_to_json(metrics)
613651
self._export_runtime_metrics_to_csv(metrics)

tftrt/benchmarking-python/benchmark_utils.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,10 @@ def wrapper(*args, **kwargs):
2323
return wrapper
2424

2525
except AttributeError:
26-
print("[WARNING] Using deprecated API to resync GPUs. "
27-
"Non negligeable overhead might be present.")
26+
print(
27+
"[WARNING] Using deprecated API to resync GPUs. "
28+
"Non negligeable overhead might be present."
29+
)
2830
p = tf.constant(0.) # Create small tensor to force GPU resync
2931

3032
def wrapper(*args, **kwargs):
@@ -214,6 +216,7 @@ def patch_dali_dataset(dataset):
214216
)
215217

216218
def take(self, limit):
219+
217220
class _Dataset(self.__class__):
218221

219222
def __init__(self, _ds, _limit):

0 commit comments

Comments
 (0)