[Benchmarking] Improving autotuning decorator logging

DEKHTIARJonathan · DEKHTIARJonathan · commit 104df403a95e · 2022-07-25T15:20:38.000-07:00
diff --git a/tftrt/benchmarking-python/benchmark_autotuner.py b/tftrt/benchmarking-python/benchmark_autotuner.py
@@ -3,6 +3,7 @@
 # -*- coding: utf-8 -*-
 
 import time
+
 import numpy as np
 import tensorflow as tf
 
@@ -86,31 +87,37 @@ def auto_tf_func_tuner(
 
     def wrapper(func):
 
-        @force_gpu_resync
-        def eager_function(*args, **kwargs):
-            return func(*args, **kwargs)
+        func_name = func.__name__
+
+        eager_function = func
 
-        @force_gpu_resync
-        @tf.function(jit_compile=use_xla)
-        def tf_function(*args, **kwargs):
-            return func(*args, **kwargs)
+        tf_function = tf.function(jit_compile=use_xla)(func)
 
-        @force_gpu_resync
-        @_force_using_concrete_function
-        @tf.function(jit_compile=use_xla)
-        def tf_concrete_function(*args, **kwargs):
-            return func(*args, **kwargs)
+        def resync_gpu_wrap_fn(_func, str_appended):
+            name = f"{func_name}_{str_appended}"
+            _func.__name__ = name
+            _func = force_gpu_resync(_func)
+            _func.__name__ = name
+            return _func
 
-        eager_function.__name__ = f"{func.__name__}_eager"
-        tf_function.__name__ = f"{func.__name__}_tf_function"
-        tf_concrete_function.__name__ = f"{func.__name__}_tf_concrete_function"
+        eager_function = resync_gpu_wrap_fn(eager_function, "eager")
+        tf_function = resync_gpu_wrap_fn(tf_function, "tf_function")
 
         funcs2autotune = [eager_function, tf_function]
+
         if use_synthetic_data:
             print(
                 "[INFO] Allowing direct concrete_function call with "
                 "synthetic data loader."
             )
+
+            tf_concrete_function = _force_using_concrete_function(
+                tf.function(jit_compile=use_xla)(func)
+            )
+            tf_concrete_function = resync_gpu_wrap_fn(
+                tf_concrete_function, "tf_concrete_function"
+            )
+
             funcs2autotune.append(tf_concrete_function)
 
         return _TFFunctionAutoTuner(
diff --git a/tftrt/benchmarking-python/benchmark_utils.py b/tftrt/benchmarking-python/benchmark_utils.py
@@ -11,9 +11,14 @@
 
 
 def force_gpu_resync(func):
+
+    func_name = func.__name__
     try:
         sync_device_fn = tf.experimental.sync_devices
-        print("[INFO] Using API `tf.experimental.sync_devices` to resync GPUs.")
+        print(
+            "[INFO] Using API `tf.experimental.sync_devices` to resync GPUs "
+            f"on function: {func_name}."
+        )
 
         def wrapper(*args, **kwargs):
             rslt = func(*args, **kwargs)
@@ -25,8 +30,10 @@ def wrapper(*args, **kwargs):
     except AttributeError:
         print(
             "[WARNING] Using deprecated API to resync GPUs. "
-            "Non negligeable overhead might be present."
+            "Non negligeable overhead might be present on function: "
+            f"{func_name}."
         )
+
         p = tf.constant(0.)  # Create small tensor to force GPU resync
 
         def wrapper(*args, **kwargs):