tensorflow
diff --git a/‎tftrt/examples/benchmark_args.py‎
Lines changed: 23 additions & 0 deletions b/‎tftrt/examples/benchmark_args.py‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎tftrt/examples/benchmark_runner.py‎
Lines changed: 47 additions & 5 deletions b/‎tftrt/examples/benchmark_runner.py‎
Lines changed: 47 additions & 5 deletions
diff --git a/‎tftrt/examples/benchmark_utils.py‎
Lines changed: 32 additions & 0 deletions b/‎tftrt/examples/benchmark_utils.py‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎tftrt/examples/image_classification/base_run_inference.sh‎
Lines changed: 1 addition & 18 deletions b/‎tftrt/examples/image_classification/base_run_inference.sh‎
Lines changed: 1 addition & 18 deletions
diff --git a/‎tftrt/examples/image_classification/image_classification.py‎
Lines changed: 1 addition & 1 deletion b/‎tftrt/examples/image_classification/image_classification.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tftrt/examples/nvidia_examples/bert_tf2/analysis.txt‎
Lines changed: 136 additions & 0 deletions b/‎tftrt/examples/nvidia_examples/bert_tf2/analysis.txt‎
Lines changed: 136 additions & 0 deletions
@@ -133,13 +133,28 @@ def __init__(self):
             "least as large as the number of samples in the dataset."
         )
 
+        self._add_bool_argument(
+            name="no_tf32",
+            default=False,
+            required=False,
+            help="If set to True, the benchmark will force not using TF32."
+        )
+
         self._add_bool_argument(
             name="use_xla",
             default=False,
             required=False,
             help="If set to True, the benchmark will use XLA JIT Compilation."
         )
 
+        self._add_bool_argument(
+            name="use_xla_auto_jit",
+            default=False,
+            required=False,
+            help="If set to True, the benchmark will use XLA JIT Auto "
+            "Clustering Compilation."
+        )
+
         self._add_bool_argument(
             name="use_synthetic_data",
             default=False,
@@ -222,6 +237,14 @@ def __init__(self):
 
         # =========================== DEBUG Flags ========================== #
 
+        self._parser.add_argument(
+            "--export_metrics_json_path",
+            type=str,
+            default=None,
+            help="If set, the script will export runtime metrics and arguments "
+            "to the set location in JSON format for further processing."
+        )
+
         self._add_bool_argument(
             name="debug",
             default=False,
 
@@ -5,6 +5,8 @@
 import os
 
 import abc
+import copy
+import json
 import logging
 import sys
 import time
@@ -57,6 +59,14 @@ def evaluate_model(self, predictions, expected, bypass_data_to_eval):
     def __init__(self, args):
         self._args = args
 
+        if args.use_xla_auto_jit:
+            print("[Benchmark] - Activating XLA JIT Auto Clustering")
+            os.environ["TF_XLA_FLAGS"] = "--tf_xla_auto_jit=2 --tf_xla_cpu_global_jit"
+
+        if args.no_tf32:
+            print("[Benchmark] - Deactivating the use of TF32 format")
+            os.environ["NVIDIA_TF32_OVERRIDE"] = "0"
+
         logging.getLogger("tensorflow").setLevel(logging.INFO)
         logging.disable(logging.WARNING)
 
@@ -96,6 +106,31 @@ def _debug_print(self, msg):
         if self._args.debug:
             print(f"[DEBUG] {msg}")
 
+    def _export_runtime_metrics_to_json(self, metric_dict):
+
+        metric_dict = {
+          # Creating a copy to avoid modifying the original
+          "results": copy.deepcopy(metric_dict),
+          "runtime_arguments": vars(self._args)
+        }
+
+        json_path = self._args.export_metrics_json_path
+        if json_path is not None:
+            try:
+                with open(json_path, 'w') as json_f:
+                    json_string = json.dumps(
+                        metric_dict,
+                        default=lambda o: o.__dict__,
+                        sort_keys=True,
+                        indent=4
+                    )
+                    print(json_string, file=json_f)
+            except Exception as e:
+                print(
+                    "[ERROR] Impossible to save JSON File at path: "
+                    f"{json_path}.\nError: {str(e)}"
+                )
+
     def _get_graph_func(self):
         """Retreives a frozen SavedModel and applies TF-TRT
         use_tftrt: bool, if true use TensorRT
@@ -381,16 +416,15 @@ def log_step(step_idx, display_every, iter_time):
 
         with timed_section("Metric Computation"):
 
+            metrics = dict()
+
             if not self._args.use_synthetic_data:
                 metric, metric_units = self.evaluate_model(
                     data_aggregator.predicted_dict,
                     data_aggregator.expected_dict, bypass_data_to_eval
                 )
-                print(f"- {metric_units:35s}: {metric:.2f}")
-
-            metrics = dict()
+                metrics["Metric"] = {metric_units: metric}
 
-            if not self._args.use_synthetic_data:
                 metrics["Total Samples Processed"] = (
                     data_aggregator.total_samples_processed
                 )
@@ -411,10 +445,18 @@ def log_step(step_idx, display_every, iter_time):
             metrics['GPU Latency Min (ms)'] = np.min(run_times) * 1000
             metrics['GPU Latency Max (ms)'] = np.max(run_times) * 1000
 
-            for key, val in sorted(metrics.items()):
+            self._export_runtime_metrics_to_json(metrics)
+
+            def log_value(key, val):
                 if isinstance(val, int):
                     print(f"- {key:35s}: {val}")
                 else:
                     print(f"- {key:35s}: {val:.2f}")
 
+            for key, val in sorted(metrics.items()):
+                if isinstance(val, dict):
+                    log_value(*list(val.items())[0])
+                else:
+                    log_value(key, val)
+
         print()  # visual spacing
@@ -190,3 +190,35 @@ def aggregate_data(self, y_pred, y):
                     self._predicted[key][idx_start:idx_stop] = y_pred[key]
                 for key, val in self._expected.items():
                     self._expected[key][idx_start:idx_stop] = y[key]
+
+
+def patch_dali_dataset(dataset):
+    import nvidia.dali.plugin.tf as dali_tf
+
+    if not isinstance(dataset, dali_tf.DALIDataset):
+        raise TypeError(
+            "Dataset supplied should be an instance of `DALIDataset`."
+            f"Received: `{type(dataset)}`"
+        )
+
+    def take(self, limit):
+        class _Dataset(self.__class__):
+
+            def __init__(self, _ds, _limit):
+                self._ds = _ds
+                self._limit = _limit
+
+            def __iter__(self):
+                idx = 0
+                for data in self._ds:
+                    if idx >= self._limit:
+                        break
+                    yield data
+                    idx += 1
+
+        return _Dataset(self, limit)
+
+    # Monkey Patch
+    dataset.__class__.take = take
+
+    return dataset
@@ -8,10 +8,7 @@ DATA_DIR=""
 MODEL_DIR=""
 
 # Default Argument Values
-NVIDIA_TF32_OVERRIDE=""
-
 BYPASS_ARGUMENTS=""
-TF_AUTO_JIT_XLA_FLAG=""
 
 # Loop through arguments and process them
 for arg in "$@"
@@ -21,10 +18,6 @@ do
         MODEL_NAME="${arg#*=}"
         shift # Remove --model_name from processing
         ;;
-        --no_tf32)
-        NVIDIA_TF32_OVERRIDE="NVIDIA_TF32_OVERRIDE=0"
-        shift # Remove --no_tf32 from processing
-        ;;
         --data_dir=*)
         DATA_DIR="${arg#*=}"
         shift # Remove --data_dir= from processing
@@ -39,10 +32,6 @@ do
         --output_tensors_name=*)
         shift # Remove --output_tensors_name= from processing
         ;;
-        --use_xla_auto_jit)
-        TF_AUTO_JIT_XLA_FLAG="TF_XLA_FLAGS=\"--tf_xla_auto_jit=2 --tf_xla_cpu_global_jit\""
-        shift # Remove --use_xla_auto_jit from processing
-        ;;
         *)
         BYPASS_ARGUMENTS=" ${BYPASS_ARGUMENTS} ${arg}"
         ;;
@@ -105,16 +94,13 @@ echo ""
 echo "[*] DATA_DIR: ${DATA_DIR}"
 echo "[*] MODEL_DIR: ${MODEL_DIR}"
 echo ""
-echo "[*] NVIDIA_TF32_OVERRIDE: ${NVIDIA_TF32_OVERRIDE}"
-echo ""
 # Custom Image Classification Task Flags
 echo "[*] INPUT_SIZE: ${INPUT_SIZE}"
 echo "[*] PREPROCESS_METHOD: ${PREPROCESS_METHOD}"
 echo "[*] NUM_CLASSES: ${NUM_CLASSES}"
 echo "[*] MAX_SAMPLES: ${MAX_SAMPLES}"
 echo "[*] OUTPUT_TENSORS_NAME: ${OUTPUT_TENSORS_NAME}"
 echo ""
-echo "[*] TF_AUTO_JIT_XLA_FLAG: ${TF_AUTO_JIT_XLA_FLAG}"
 echo "[*] BYPASS_ARGUMENTS: $(echo \"${BYPASS_ARGUMENTS}\" | tr -s ' ')"
 echo -e "********************************************************************\n"
 
@@ -157,10 +143,7 @@ BENCH_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 cd ${BENCH_DIR}
 
 # Execute the example
-
-PREPEND_COMMAND="${TF_AUTO_JIT_XLA_FLAG} ${NVIDIA_TF32_OVERRIDE}"
-
-COMMAND="${PREPEND_COMMAND} python image_classification.py \
+COMMAND="python image_classification.py \
     --data_dir ${DATA_DIR} \
     --calib_data_dir ${DATA_DIR} \
     --input_saved_model_dir ${INPUT_SAVED_MODEL_DIR} \
 
@@ -219,7 +219,7 @@ def evaluate_model(self, predictions, expected, bypass_data_to_eval):
         This computes overall accuracy, mAP,  etc.  Returns the
         metric value and a metric_units string naming the metric.
 
-        Note: script arguments can be accessed using `args.attr`
+        Note: script arguments can be accessed using `self._args.attr`
         """
 
         return (
 
@@ -0,0 +1,136 @@
+
+MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs:
+
+signature_def['__saved_model_init_op']:
+  The given SavedModel SignatureDef contains the following input(s):
+  The given SavedModel SignatureDef contains the following output(s):
+    outputs['__saved_model_init_op'] tensor_info:
+        dtype: DT_INVALID
+        shape: unknown_rank
+        name: NoOp
+  Method name is: 
+
+signature_def['serving_default']:
+  The given SavedModel SignatureDef contains the following input(s):
+    inputs['input_mask'] tensor_info:
+        dtype: DT_INT32
+        shape: (-1, 384)
+        name: serving_default_input_mask:0
+    inputs['input_type_ids'] tensor_info:
+        dtype: DT_INT32
+        shape: (-1, 384)
+        name: serving_default_input_type_ids:0
+    inputs['input_word_ids'] tensor_info:
+        dtype: DT_INT32
+        shape: (-1, 384)
+        name: serving_default_input_word_ids:0
+  The given SavedModel SignatureDef contains the following output(s):
+    outputs['end_positions'] tensor_info:
+        dtype: DT_FLOAT
+        shape: (-1, 384)
+        name: StatefulPartitionedCall:0
+    outputs['start_positions'] tensor_info:
+        dtype: DT_FLOAT
+        shape: (-1, 384)
+        name: StatefulPartitionedCall:1
+  Method name is: tensorflow/serving/predict
+
+Defined Functions:
+  Function Name: '__call__'
+    Option #1
+      Callable with:
+        Argument #1
+          DType: list
+          Value: [TensorSpec(shape=(None, 384), dtype=tf.int32, name='input_word_ids'), TensorSpec(shape=(None, 384), dtype=tf.int32, name='input_mask'), TensorSpec(shape=(None, 384), dtype=tf.int32, name='input_type_ids'), ]
+        Argument #2
+          DType: bool
+          Value: True
+        Argument #3
+          DType: NoneType
+          Value: None
+    Option #2
+      Callable with:
+        Argument #1
+          DType: list
+          Value: [TensorSpec(shape=(None, 384), dtype=tf.int32, name='inputs/0'), TensorSpec(shape=(None, 384), dtype=tf.int32, name='inputs/1'), TensorSpec(shape=(None, 384), dtype=tf.int32, name='inputs/2'), ]
+        Argument #2
+          DType: bool
+          Value: True
+        Argument #3
+          DType: NoneType
+          Value: None
+    Option #3
+      Callable with:
+        Argument #1
+          DType: list
+          Value: [TensorSpec(shape=(None, 384), dtype=tf.int32, name='input_word_ids'), TensorSpec(shape=(None, 384), dtype=tf.int32, name='input_mask'), TensorSpec(shape=(None, 384), dtype=tf.int32, name='input_type_ids'), ]
+        Argument #2
+          DType: bool
+          Value: False
+        Argument #3
+          DType: NoneType
+          Value: None
+    Option #4
+      Callable with:
+        Argument #1
+          DType: list
+          Value: [TensorSpec(shape=(None, 384), dtype=tf.int32, name='inputs/0'), TensorSpec(shape=(None, 384), dtype=tf.int32, name='inputs/1'), TensorSpec(shape=(None, 384), dtype=tf.int32, name='inputs/2'), ]
+        Argument #2
+          DType: bool
+          Value: False
+        Argument #3
+          DType: NoneType
+          Value: None
+
+  Function Name: '_default_save_signature'
+    Option #1
+      Callable with:
+        Argument #1
+          DType: list
+          Value: [TensorSpec(shape=(None, 384), dtype=tf.int32, name='input_word_ids'), TensorSpec(shape=(None, 384), dtype=tf.int32, name='input_mask'), TensorSpec(shape=(None, 384), dtype=tf.int32, name='input_type_ids'), ]
+
+  Function Name: 'call_and_return_all_conditional_losses'
+    Option #1
+      Callable with:
+        Argument #1
+          DType: list
+          Value: [TensorSpec(shape=(None, 384), dtype=tf.int32, name='inputs/0'), TensorSpec(shape=(None, 384), dtype=tf.int32, name='inputs/1'), TensorSpec(shape=(None, 384), dtype=tf.int32, name='inputs/2'), ]
+        Argument #2
+          DType: bool
+          Value: True
+        Argument #3
+          DType: NoneType
+          Value: None
+    Option #2
+      Callable with:
+        Argument #1
+          DType: list
+          Value: [TensorSpec(shape=(None, 384), dtype=tf.int32, name='inputs/0'), TensorSpec(shape=(None, 384), dtype=tf.int32, name='inputs/1'), TensorSpec(shape=(None, 384), dtype=tf.int32, name='inputs/2'), ]
+        Argument #2
+          DType: bool
+          Value: False
+        Argument #3
+          DType: NoneType
+          Value: None
+    Option #3
+      Callable with:
+        Argument #1
+          DType: list
+          Value: [TensorSpec(shape=(None, 384), dtype=tf.int32, name='input_word_ids'), TensorSpec(shape=(None, 384), dtype=tf.int32, name='input_mask'), TensorSpec(shape=(None, 384), dtype=tf.int32, name='input_type_ids'), ]
+        Argument #2
+          DType: bool
+          Value: True
+        Argument #3
+          DType: NoneType
+          Value: None
+    Option #4
+      Callable with:
+        Argument #1
+          DType: list
+          Value: [TensorSpec(shape=(None, 384), dtype=tf.int32, name='input_word_ids'), TensorSpec(shape=(None, 384), dtype=tf.int32, name='input_mask'), TensorSpec(shape=(None, 384), dtype=tf.int32, name='input_type_ids'), ]
+        Argument #2
+          DType: bool
+          Value: False
+        Argument #3
+          DType: NoneType
+          Value: None