tensorflow
diff --git a/‎tests/utils.py‎
Lines changed: 1 addition & 4 deletions b/‎tests/utils.py‎
Lines changed: 1 addition & 4 deletions
diff --git a/‎tftrt/benchmarking-python/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎tftrt/benchmarking-python/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎tftrt/benchmarking-python/benchmark_args.py‎
Lines changed: 1 addition & 4 deletions b/‎tftrt/benchmarking-python/benchmark_args.py‎
Lines changed: 1 addition & 4 deletions
diff --git a/‎tftrt/benchmarking-python/benchmark_autotuner.py‎
Lines changed: 19 additions & 8 deletions b/‎tftrt/benchmarking-python/benchmark_autotuner.py‎
Lines changed: 19 additions & 8 deletions
diff --git a/‎tftrt/benchmarking-python/benchmark_runner.py‎
Lines changed: 75 additions & 37 deletions b/‎tftrt/benchmarking-python/benchmark_runner.py‎
Lines changed: 75 additions & 37 deletions
diff --git a/‎tftrt/benchmarking-python/benchmark_utils.py‎
Lines changed: 5 additions & 2 deletions b/‎tftrt/benchmarking-python/benchmark_utils.py‎
Lines changed: 5 additions & 2 deletions
@@ -27,10 +27,7 @@ def assertNotRaises(self, exc_type):
 
 
 def list_all_py_files():
-    for _dir in [
-        "tests",
-        os.path.join("tftrt", "benchmarking-python")
-    ]:
+    for _dir in ["tests", os.path.join("tftrt", "benchmarking-python")]:
         for _file in iglob(f"{_dir}/**/*.py", recursive=True):
             if any([path in _file for path in _excludes_paths]):
                 continue
 
@@ -0,0 +1,2 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
@@ -343,10 +343,7 @@ def _validate_args(self, args):
                 f"({args.num_iterations} <= {args.num_warmup_iterations})"
             )
 
-        if (
-            args.tf_profile_verbose and
-            args.tf_profile_export_path is None
-        ):
+        if (args.tf_profile_verbose and args.tf_profile_export_path is None):
             raise ValueError(
                 "`--tf_profile_verbose` can only be set if "
                 "`--tf_profile_export_path=/path/to/export` is defined."
 
@@ -10,6 +10,7 @@
 
 
 class _TFFunctionAutoTuner(object):
+
     def __init__(self, funcs, calls_per_func, skip_n_first):
         if not isinstance(funcs, (tuple, list)):
             raise ValueError("Argument `funcs` must be a list or tuple.")
@@ -33,17 +34,23 @@ def _autotune(self, *arg, **kwargs):
             output = self._fns[fn_id](*arg, **kwargs)
             self._timings[fn_id].append(time.time() - start_t)
         except IndexError:
-            print("\n[DEBUG] AutoTuning is over... Collecting timing statistics:")
+            print(
+                "\n[DEBUG] AutoTuning is over... Collecting timing statistics:"
+            )
             perf_data = []
             for idx, fn_stat in enumerate(self._timings):
                 perf_data.append(np.mean(fn_stat[self._skip_n_first:]))
-                print(f"\t- [DEBUG] Function ID: {idx} - "
-                      f"Name: {self._fns[idx].__name__:40s} - "
-                      f"Average Exec Time: {perf_data[-1]}")
+                print(
+                    f"\t- [DEBUG] Function ID: {idx} - "
+                    f"Name: {self._fns[idx].__name__:40s} - "
+                    f"Average Exec Time: {perf_data[-1]}"
+                )
 
             best_fn_id = np.argmin(perf_data)
-            print(f"[DEBUG] Selecting function ID: {best_fn_id}. "
-                  f"Setting exec path to: `{self._fns[best_fn_id].__name__}`\n")
+            print(
+                f"[DEBUG] Selecting function ID: {best_fn_id}. "
+                f"Setting exec path to: `{self._fns[best_fn_id].__name__}`\n"
+            )
 
             self._best_fn = self._fns[best_fn_id]
             return self._best_fn(*arg, **kwargs)
@@ -58,13 +65,15 @@ def __call__(self, *arg, **kwargs):
 def _force_using_concrete_function(func):
     # `context` needs to be a closure of type list or dict for persistance
     context = []
+
     def _wrapper(*args, **kwargs):
         try:
             return context[0](*args, **kwargs)
         except IndexError:
             print(f"[INFO] Building the concrete function")
             context.append(func.get_concrete_function(*args, **kwargs))
             return context[0](*args, **kwargs)
+
     return _wrapper
 
 
@@ -98,8 +107,10 @@ def tf_concrete_function(*args, **kwargs):
 
         funcs2autotune = [eager_function, tf_function]
         if use_synthetic_data:
-            print("[INFO] Allowing direct concrete_function call with "
-                  "synthetic data loader.")
+            print(
+                "[INFO] Allowing direct concrete_function call with "
+                "synthetic data loader."
+            )
             funcs2autotune.append(tf_concrete_function)
 
         return _TFFunctionAutoTuner(
 
@@ -36,7 +36,6 @@
 from tensorflow.python.saved_model import signature_constants
 from tensorflow.python.saved_model import tag_constants
 
-
 __all__ = ["BaseBenchmarkRunner"]
 
 
@@ -71,7 +70,8 @@ def __init__(self, args):
 
         if args.use_xla_auto_jit:
             print("[Benchmark] - Activating XLA JIT Auto Clustering")
-            os.environ["TF_XLA_FLAGS"] = "--tf_xla_auto_jit=2 --tf_xla_cpu_global_jit"
+            os.environ["TF_XLA_FLAGS"] = "--tf_xla_auto_jit=2"
+            os.environ["TF_XLA_FLAGS"] += " --tf_xla_cpu_global_jit"
 
         if args.no_tf32:
             print("[Benchmark] - Deactivating the use of TF32 format")
@@ -111,10 +111,14 @@ def _config_gpu_memory(self, gpu_mem_cap):
                 else:
                     try:
                         set_virtual_device_configuration = tf.config.set_virtual_device_configuration
-                        device_config = tf.config.LogicalDeviceConfiguration(memory_limit=gpu_mem_cap)
+                        device_config = tf.config.LogicalDeviceConfiguration(
+                            memory_limit=gpu_mem_cap
+                        )
                     except AttributeError:
                         set_virtual_device_configuration = tf.config.experimental.set_virtual_device_configuration
-                        device_config = tf.config.experimental.VirtualDeviceConfiguration(memory_limit=gpu_mem_cap)
+                        device_config = tf.config.experimental.VirtualDeviceConfiguration(
+                            memory_limit=gpu_mem_cap
+                        )
 
                     set_virtual_device_configuration(gpu, [device_config])
             except RuntimeError as e:
@@ -133,9 +137,9 @@ def _export_runtime_metrics_to_json(self, metric_dict):
                 return
 
             metric_dict = {
-              # Creating a copy to avoid modifying the original
-              "results": copy.deepcopy(metric_dict),
-              "runtime_arguments": vars(self._args)
+                # Creating a copy to avoid modifying the original
+                "results": copy.deepcopy(metric_dict),
+                "runtime_arguments": vars(self._args)
             }
 
             with open(file_path, 'w') as json_f:
@@ -160,6 +164,7 @@ def _export_runtime_metrics_to_csv(self, metric_dict):
 
             data = {f"metric_{k}": v for k, v in metric_dict.items()}
 
+            # yapf: disable
             args_to_save = [
                 "batch_size",
                 "input_saved_model_dir",
@@ -172,6 +177,7 @@ def _export_runtime_metrics_to_csv(self, metric_dict):
                 "use_xla",
                 "use_xla_auto_jit"
             ]
+            # yapf: enable
 
             runtime_arguments = vars(self._args)
             for key in args_to_save:
@@ -181,11 +187,15 @@ def _export_runtime_metrics_to_csv(self, metric_dict):
 
             if not os.path.isfile(file_path):
                 with open(file_path, 'w') as outcsv:
-                    writer = csv.DictWriter(outcsv, fieldnames=fieldnames, delimiter=',')
+                    writer = csv.DictWriter(
+                        outcsv, fieldnames=fieldnames, delimiter=','
+                    )
                     writer.writeheader()
 
             with open(file_path, 'a') as outcsv:
-                writer = csv.DictWriter(outcsv, fieldnames=fieldnames, delimiter=',')
+                writer = csv.DictWriter(
+                    outcsv, fieldnames=fieldnames, delimiter=','
+                )
                 writer.writerow(data)
 
         except Exception as e:
@@ -209,7 +219,9 @@ def load_model_from_disk(
             graph_func = saved_model_loaded.signatures[signature_key]
 
             if precision == "FP16":
-                tf.config.optimizer.set_experimental_options({"auto_mixed_precision": True})
+                tf.config.optimizer.set_experimental_options({
+                    "auto_mixed_precision": True
+                })
 
             # Known TF Issue: https://github.com/tensorflow/tensorflow/issues/37615#issuecomment-767804930
             # it looks like if the original trackable object is released by
@@ -429,7 +441,10 @@ def infer_batch(x):
             memcopy_times = []
             dequeue_times = []
 
-            def log_step(step_idx, display_every, iter_time, memcpyHtoD_time, dequeue_time):
+            def log_step(
+                step_idx, display_every, iter_time, memcpyHtoD_time,
+                dequeue_time
+            ):
                 if step_idx % display_every == 0:
                     print(
                         f"step {step_idx:04d}, "
@@ -439,6 +454,7 @@ def log_step(step_idx, display_every, iter_time, memcpyHtoD_time, dequeue_time):
                     )
 
             if self._args.tf_profile_export_path:
+
                 def start_profiling():
                     if self._args.tf_profile_verbose:
                         profiler_opts = tf.profiler.experimental.ProfilerOptions(
@@ -482,9 +498,9 @@ def start_profiling():
             ds_iter = iter(dataset)
 
             dequeue_batch_fn = get_dequeue_batch_fn(
-              ds_iter,
-              use_xla=self._args.use_xla,
-              use_synthetic_data=self._args.use_synthetic_data
+                ds_iter,
+                use_xla=self._args.use_xla,
+                use_synthetic_data=self._args.use_synthetic_data
             )
 
             force_data_on_gpu_fn = get_force_data_on_gpu_fn(
@@ -500,10 +516,8 @@ def start_profiling():
                 if step_idx == self._args.num_warmup_iterations - 5:
                     start_profiling()
 
-                if (
-                    self._args.num_iterations is not None and
-                    step_idx > self._args.num_iterations
-                ):
+                if (self._args.num_iterations is not None and
+                        step_idx > self._args.num_iterations):
                     break
 
                 with tracing_ctx('', step_num=step_idx, _r=1):
@@ -534,28 +548,44 @@ def start_profiling():
                     log_step(
                         step_idx,
                         display_every=self._args.display_every,
-                        iter_time=np.mean(iter_times[-self._args.display_every:]) * 1000,
-                        memcpyHtoD_time=np.mean(memcopy_times[-self._args.display_every:]) * 1000,
-                        dequeue_time=np.mean(dequeue_times[-self._args.display_every:]) * 1000
+                        iter_time=np.mean(
+                            iter_times[-self._args.display_every:]
+                        ) * 1000,
+                        memcpyHtoD_time=np.mean(
+                            memcopy_times[-self._args.display_every:]
+                        ) * 1000,
+                        dequeue_time=np.mean(
+                            dequeue_times[-self._args.display_every:]
+                        ) * 1000
                     )
                 else:
-                    print(f"{'GPU Iteration Time':18s}: {iter_times[-1]:08.4f}s")
-                    print(f"{'Data MemCopyHtoD Time':18s}: {memcpyHtoD_time[-1]:08.4f}s")
-                    print(f"{'Data Dequeue Time':18s}: {dequeue_times[-1]:08.4f}s")
+                    print(
+                        f"{'GPU Iteration Time':18s}: {iter_times[-1]:08.4f}s"
+                    )
+                    print(
+                        f"{'Data MemCopyHtoD Time':18s}: {memcpyHtoD_time[-1]:08.4f}s"
+                    )
+                    print(
+                        f"{'Data Dequeue Time':18s}: {dequeue_times[-1]:08.4f}s"
+                    )
 
                 if not self._args.use_synthetic_data:
                     data_aggregator.aggregate_data(y_pred, y)
 
-            if (
-                not self._args.debug_performance and
-                step_idx % self._args.display_every != 0
-            ):  # avoids double printing
+            if (not self._args.debug_performance and
+                    step_idx % self._args.display_every !=
+                    0):  # avoids double printing
                 log_step(
                     step_idx,
                     display_every=1,  # force print
-                    iter_time=np.mean(iter_times[-self._args.display_every:]) * 1000,
-                    memcpyHtoD_time=np.mean(memcopy_times[-self._args.display_every:]) * 1000,
-                    dequeue_time=np.mean(dequeue_times[-self._args.display_every:]) * 1000
+                    iter_time=np.mean(iter_times[-self._args.display_every:]) *
+                    1000,
+                    memcpyHtoD_time=np.mean(
+                        memcopy_times[-self._args.display_every:]
+                    ) * 1000,
+                    dequeue_time=np.mean(
+                        dequeue_times[-self._args.display_every:]
+                    ) * 1000
                 )
 
             if step_idx >= 100:
@@ -588,13 +618,17 @@ def start_profiling():
 
             metrics['Total GPU Time (s)'] = int(np.ceil(np.sum(iter_times)))
             metrics['Throughput (samples/sec)'] = (
-                self._args.batch_size / sp.stats.trim_mean(
-                    iter_times, self._args.trim_mean_percentage))
+                self._args.batch_size /
+                sp.stats.trim_mean(iter_times, self._args.trim_mean_percentage)
+            )
 
             def timing_metrics(time_arr, log_prefix):
                 data = dict()
-                data[f"{log_prefix} Trim Mean [{self._args.trim_mean_percentage * 100}%] (ms)"] = (
-                    sp.stats.trim_mean(time_arr, self._args.trim_mean_percentage) * 1000
+                data[
+                    f"{log_prefix} Trim Mean [{self._args.trim_mean_percentage * 100}%] (ms)"
+                ] = (
+                    sp.stats.
+                    trim_mean(time_arr, self._args.trim_mean_percentage) * 1000
                 )
                 data[f"{log_prefix} 99th_percentile (ms)"] = np.percentile(
                     time_arr, q=99, interpolation='lower'
@@ -606,8 +640,12 @@ def timing_metrics(time_arr, log_prefix):
                 return data
 
             metrics.update(timing_metrics(iter_times, "GPU Latency"))
-            metrics.update(timing_metrics(dequeue_times, "Data Batch Dequeue Time"))
-            metrics.update(timing_metrics(memcopy_times, "Data MemCopyHtoD Time"))
+            metrics.update(
+                timing_metrics(dequeue_times, "Data Batch Dequeue Time")
+            )
+            metrics.update(
+                timing_metrics(memcopy_times, "Data MemCopyHtoD Time")
+            )
 
             self._export_runtime_metrics_to_json(metrics)
             self._export_runtime_metrics_to_csv(metrics)
 
@@ -23,8 +23,10 @@ def wrapper(*args, **kwargs):
         return wrapper
 
     except AttributeError:
-        print("[WARNING] Using deprecated API to resync GPUs. "
-              "Non negligeable overhead might be present.")
+        print(
+            "[WARNING] Using deprecated API to resync GPUs. "
+            "Non negligeable overhead might be present."
+        )
         p = tf.constant(0.)  # Create small tensor to force GPU resync
 
         def wrapper(*args, **kwargs):
@@ -214,6 +216,7 @@ def patch_dali_dataset(dataset):
         )
 
     def take(self, limit):
+
         class _Dataset(self.__class__):
 
             def __init__(self, _ds, _limit):
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+#!/usr/bin/env python`
	`2`	`+# -- coding: utf-8 --`