[Benchmark-Py] Release 1.0.1 - Remove autotuning on get_dequeue_batch_fn in order to fix DALIDataset patch

DEKHTIARJonathan · DEKHTIARJonathan · commit 903e0595afff · 2022-07-25T12:17:26.000-07:00
diff --git a/tftrt/benchmarking-python/CHANGELOG.md b/tftrt/benchmarking-python/CHANGELOG.md
@@ -37,14 +37,23 @@ Description of the change
                   existing metrics or models scripts in a way that would make
                   metrics not comparable between minor releases.
 
- - **Patch Version:** Changes that are expected to have no change to the operation
-                  of the benchmark nor the way metrics are calculated.
-                  Basically these changes are transparent for the user.
+ - **Patch Version:** Changes that are expected to have no change to the
+                   operation of the benchmark nor the way metrics are
+                   calculated. Basically these changes are transparent for the
+                   user.
 
 # Versions
 
 <!-- YOU CAN EDIT FROM HERE -->
 
+## [1.0.1] - 2022.07.25 - @DEKHTIARJonathan
+
+Removing AutoTuning on `get_dequeue_batch_fn` because DALIDataset was not
+respecting the limit on the number of batches.
+
+It should not impact the benchmark results, most of the time, the autotuner was
+selecting the eager version anyway.
+
 ## [1.0.0] - 2022.07.20 - @DEKHTIARJonathan
 
 Initial Versioning Release.
diff --git a/tftrt/benchmarking-python/benchmark_runner.py b/tftrt/benchmarking-python/benchmark_runner.py
@@ -40,7 +40,7 @@
 # The `__version__` number shall be updated everytime core benchmarking files
 # are updated.
 # Please update CHANGELOG.md with a description of what this version changed.
-__version__ = "1.0.0"
+__version__ = "1.0.1"
 
 __all__ = ["__version__", "BaseBenchmarkRunner"]
 
diff --git a/tftrt/benchmarking-python/benchmark_utils.py b/tftrt/benchmarking-python/benchmark_utils.py
@@ -204,36 +204,3 @@ def aggregate_data(self, y_pred, y):
                             f"Expected: {y[key].shape}"
                         )
                     self._expected[key][idx_start:idx_stop] = y[key]
-
-
-def patch_dali_dataset(dataset):
-    import nvidia.dali.plugin.tf as dali_tf
-
-    if not isinstance(dataset, dali_tf.DALIDataset):
-        raise TypeError(
-            "Dataset supplied should be an instance of `DALIDataset`."
-            f"Received: `{type(dataset)}`"
-        )
-
-    def take(self, limit):
-
-        class _Dataset(self.__class__):
-
-            def __init__(self, _ds, _limit):
-                self._ds = _ds
-                self._limit = _limit
-
-            def __iter__(self):
-                idx = 0
-                for data in self._ds:
-                    if idx >= self._limit:
-                        break
-                    yield data
-                    idx += 1
-
-        return _Dataset(self, limit)
-
-    # Monkey Patch
-    dataset.__class__.take = take
-
-    return dataset
diff --git a/tftrt/benchmarking-python/dataloading_utils.py b/tftrt/benchmarking-python/dataloading_utils.py
@@ -7,6 +7,7 @@
 import tensorflow as tf
 
 from benchmark_autotuner import auto_tf_func_tuner
+from benchmark_utils import force_gpu_resync
 
 
 def SyntheticDataset(dataset, device):
@@ -69,7 +70,7 @@ def ensure_dataset_on_gpu(dataset, device):
 
 def get_dequeue_batch_fn(ds_iter, use_xla=False, use_synthetic_data=False):
 
-    @auto_tf_func_tuner(use_xla=use_xla, use_synthetic_data=use_synthetic_data)
+    @force_gpu_resync
     def dequeue_batch_fn():
         """This function should not use tf.function().
         It would create two unwanted effects:
@@ -98,3 +99,33 @@ def force_data_on_gpu_fn(data):
                 return tf.identity(data)
 
     return force_data_on_gpu_fn
+
+
+def patch_dali_dataset(dataset):
+    import nvidia.dali.plugin.tf as dali_tf
+
+    if not isinstance(dataset, dali_tf.DALIDataset):
+        raise TypeError(
+            "Dataset supplied should be an instance of `DALIDataset`."
+            f"Received: `{type(dataset)}`"
+        )
+
+    def take(self, limit):
+
+        class _Dataset(self.__class__):
+
+            def __init__(self, _ds, _limit):
+                self._ds = _ds
+                self._limit = _limit
+
+            def __iter__(self):
+                ds_iter = iter(self._ds)
+                for idx in tf.range(self._limit):
+                    yield next(ds_iter)
+
+        return _Dataset(self, limit)
+
+    # Monkey Patch
+    dataset.__class__.take = take
+
+    return dataset
diff --git a/tftrt/benchmarking-python/nvidia_examples/nnunet2d_tf2/infer.py b/tftrt/benchmarking-python/nvidia_examples/nnunet2d_tf2/infer.py
@@ -42,7 +42,7 @@
 
 from benchmark_args import BaseCommandLineAPI
 from benchmark_runner import BaseBenchmarkRunner
-from benchmark_utils import patch_dali_dataset
+from dataloading_utils import patch_dali_dataset
 
 
 class CommandLineAPI(BaseCommandLineAPI):
diff --git a/tftrt/benchmarking-python/nvidia_examples/nnunet3d_tf2/infer.py b/tftrt/benchmarking-python/nvidia_examples/nnunet3d_tf2/infer.py
@@ -42,7 +42,7 @@
 
 from benchmark_args import BaseCommandLineAPI
 from benchmark_runner import BaseBenchmarkRunner
-from benchmark_utils import patch_dali_dataset
+from dataloading_utils import patch_dali_dataset
 
 
 class CommandLineAPI(BaseCommandLineAPI):